nkasmanoff
/

nature-buddy

@@ -1,8 +1,8 @@
 {
     "epoch": 5.0,
-    "total_flos": 2990143010382336.0,
-    "train_loss": 0.5055735324683629,
-    "train_runtime": 417.7958,
-    "train_samples_per_second": 38.344,
-    "train_steps_per_second": 4.799
 }

 {
     "epoch": 5.0,
+    "total_flos": 6581450258638848.0,
+    "train_loss": 0.5889826508769839,
+    "train_runtime": 806.0376,
+    "train_samples_per_second": 31.5,
+    "train_steps_per_second": 3.939
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 5.0,
-    "total_flos": 2990143010382336.0,
-    "train_loss": 0.5055735324683629,
-    "train_runtime": 417.7958,
-    "train_samples_per_second": 38.344,
-    "train_steps_per_second": 4.799
 }

 {
     "epoch": 5.0,
+    "total_flos": 6581450258638848.0,
+    "train_loss": 0.5889826508769839,
+    "train_runtime": 806.0376,
+    "train_samples_per_second": 31.5,
+    "train_steps_per_second": 3.939
 }

trainer_state.json CHANGED Viewed

@@ -3,163 +3,240 @@
   "best_model_checkpoint": null,
   "epoch": 5.0,
   "eval_steps": 500,
-  "global_step": 2005,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.24937655860349128,
-      "grad_norm": 1.2734375,
-      "learning_rate": 0.0004950495049504951,
-      "loss": 1.4705,
       "step": 100
     },
     {
-      "epoch": 0.49875311720698257,
-      "grad_norm": 1.265625,
-      "learning_rate": 0.0004966720227318671,
-      "loss": 0.9437,
       "step": 200
     },
     {
-      "epoch": 0.7481296758104738,
-      "grad_norm": 1.078125,
-      "learning_rate": 0.0004866439935228541,
-      "loss": 0.8591,
       "step": 300
     },
     {
-      "epoch": 0.9975062344139651,
-      "grad_norm": 1.03125,
-      "learning_rate": 0.00047018796504965955,
-      "loss": 0.8111,
       "step": 400
     },
     {
-      "epoch": 1.2468827930174564,
-      "grad_norm": 0.8359375,
-      "learning_rate": 0.0004477509350887423,
-      "loss": 0.574,
       "step": 500
     },
     {
-      "epoch": 1.4962593516209477,
-      "grad_norm": 1.0234375,
-      "learning_rate": 0.0004199423643322254,
-      "loss": 0.5806,
       "step": 600
     },
     {
-      "epoch": 1.745635910224439,
-      "grad_norm": 1.0078125,
-      "learning_rate": 0.00038751762150804385,
-      "loss": 0.5699,
       "step": 700
     },
     {
-      "epoch": 1.9950124688279303,
-      "grad_norm": 0.8046875,
-      "learning_rate": 0.00035135746517639757,
-      "loss": 0.5553,
       "step": 800
     },
     {
-      "epoch": 2.2443890274314215,
-      "grad_norm": 0.74609375,
-      "learning_rate": 0.00031244411954180673,
-      "loss": 0.3753,
       "step": 900
     },
     {
-      "epoch": 2.493765586034913,
-      "grad_norm": 0.91796875,
-      "learning_rate": 0.00027183459413737774,
-      "loss": 0.374,
       "step": 1000
     },
     {
-      "epoch": 2.743142144638404,
-      "grad_norm": 1.0546875,
-      "learning_rate": 0.00023063197210303058,
-      "loss": 0.3746,
       "step": 1100
     },
     {
-      "epoch": 2.9925187032418954,
-      "grad_norm": 1.0625,
-      "learning_rate": 0.00018995544695885592,
-      "loss": 0.3789,
       "step": 1200
     },
     {
-      "epoch": 3.2418952618453867,
-      "grad_norm": 0.94140625,
-      "learning_rate": 0.0001509099217695958,
-      "loss": 0.2943,
       "step": 1300
     },
     {
-      "epoch": 3.491271820448878,
-      "grad_norm": 0.81640625,
-      "learning_rate": 0.00011455599648308674,
-      "loss": 0.288,
       "step": 1400
     },
     {
-      "epoch": 3.7406483790523692,
-      "grad_norm": 0.78515625,
-      "learning_rate": 8.18811586814684e-05,
-      "loss": 0.2878,
       "step": 1500
     },
     {
-      "epoch": 3.9900249376558605,
-      "grad_norm": 0.82421875,
-      "learning_rate": 5.377296029546741e-05,
-      "loss": 0.2929,
       "step": 1600
     },
     {
-      "epoch": 4.239401496259352,
-      "grad_norm": 0.8203125,
-      "learning_rate": 3.099490888702508e-05,
-      "loss": 0.2837,
       "step": 1700
     },
     {
-      "epoch": 4.488778054862843,
-      "grad_norm": 0.8125,
-      "learning_rate": 1.4165728369278874e-05,
-      "loss": 0.2715,
       "step": 1800
     },
     {
-      "epoch": 4.738154613466334,
-      "grad_norm": 0.85546875,
-      "learning_rate": 3.7425525083322754e-06,
-      "loss": 0.268,
       "step": 1900
     },
     {
-      "epoch": 4.987531172069826,
-      "grad_norm": 0.88671875,
-      "learning_rate": 8.507724455514287e-09,
-      "loss": 0.2712,
       "step": 2000
     },
     {
       "epoch": 5.0,
-      "step": 2005,
-      "total_flos": 2990143010382336.0,
-      "train_loss": 0.5055735324683629,
-      "train_runtime": 417.7958,
-      "train_samples_per_second": 38.344,
-      "train_steps_per_second": 4.799
     }
   ],
   "logging_steps": 100,
-  "max_steps": 2005,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
@@ -175,7 +252,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2990143010382336.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_model_checkpoint": null,
   "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 3175,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.15748031496062992,
+      "grad_norm": 1.953125,
+      "learning_rate": 0.00031446540880503143,
+      "loss": 1.7301,
       "step": 100
     },
     {
+      "epoch": 0.31496062992125984,
+      "grad_norm": 1.1328125,
+      "learning_rate": 0.0004997720451762572,
+      "loss": 1.1137,
       "step": 200
     },
     {
+      "epoch": 0.47244094488188976,
+      "grad_norm": 1.28125,
+      "learning_rate": 0.0004973084374349976,
+      "loss": 1.0141,
       "step": 300
     },
     {
+      "epoch": 0.6299212598425197,
+      "grad_norm": 1.078125,
+      "learning_rate": 0.0004921639131931859,
+      "loss": 0.9538,
       "step": 400
     },
     {
+      "epoch": 0.7874015748031497,
+      "grad_norm": 0.9453125,
+      "learning_rate": 0.00048439424102900066,
+      "loss": 0.9061,
       "step": 500
     },
     {
+      "epoch": 0.9448818897637795,
+      "grad_norm": 1.0859375,
+      "learning_rate": 0.00047408364711169396,
+      "loss": 0.8785,
       "step": 600
     },
     {
+      "epoch": 1.1023622047244095,
+      "grad_norm": 0.83984375,
+      "learning_rate": 0.00046134390215823,
+      "loss": 0.7189,
       "step": 700
     },
     {
+      "epoch": 1.2598425196850394,
+      "grad_norm": 0.953125,
+      "learning_rate": 0.00044631310979666443,
+      "loss": 0.6703,
       "step": 800
     },
     {
+      "epoch": 1.4173228346456692,
+      "grad_norm": 0.9296875,
+      "learning_rate": 0.0004291542094708612,
+      "loss": 0.6648,
       "step": 900
     },
     {
+      "epoch": 1.574803149606299,
+      "grad_norm": 1.2890625,
+      "learning_rate": 0.000410053210115622,
+      "loss": 0.6524,
       "step": 1000
     },
     {
+      "epoch": 1.7322834645669292,
+      "grad_norm": 1.03125,
+      "learning_rate": 0.00038921717374985584,
+      "loss": 0.6622,
       "step": 1100
     },
     {
+      "epoch": 1.889763779527559,
+      "grad_norm": 0.96484375,
+      "learning_rate": 0.0003668719708463959,
+      "loss": 0.6379,
       "step": 1200
     },
     {
+      "epoch": 2.047244094488189,
+      "grad_norm": 1.0234375,
+      "learning_rate": 0.00034325983181110047,
+      "loss": 0.5745,
       "step": 1300
     },
     {
+      "epoch": 2.204724409448819,
+      "grad_norm": 1.0546875,
+      "learning_rate": 0.00031863672111412524,
+      "loss": 0.4619,
       "step": 1400
     },
     {
+      "epoch": 2.362204724409449,
+      "grad_norm": 1.203125,
+      "learning_rate": 0.00029326956253877123,
+      "loss": 0.4808,
       "step": 1500
     },
     {
+      "epoch": 2.5196850393700787,
+      "grad_norm": 0.9921875,
+      "learning_rate": 0.00026743334562725617,
+      "loss": 0.4598,
       "step": 1600
     },
     {
+      "epoch": 2.677165354330709,
+      "grad_norm": 1.109375,
+      "learning_rate": 0.00024140814469062377,
+      "loss": 0.4687,
       "step": 1700
     },
     {
+      "epoch": 2.8346456692913384,
+      "grad_norm": 0.98046875,
+      "learning_rate": 0.0002154760826978469,
+      "loss": 0.4703,
       "step": 1800
     },
     {
+      "epoch": 2.9921259842519685,
+      "grad_norm": 1.0625,
+      "learning_rate": 0.00018991827295670777,
+      "loss": 0.4558,
       "step": 1900
     },
     {
+      "epoch": 3.1496062992125986,
+      "grad_norm": 1.046875,
+      "learning_rate": 0.00016501177173978493,
+      "loss": 0.3817,
       "step": 2000
     },
+    {
+      "epoch": 3.3070866141732282,
+      "grad_norm": 1.0859375,
+      "learning_rate": 0.00014102657489022886,
+      "loss": 0.3786,
+      "step": 2100
+    },
+    {
+      "epoch": 3.4645669291338583,
+      "grad_norm": 0.8671875,
+      "learning_rate": 0.00011822269096524812,
+      "loss": 0.3779,
+      "step": 2200
+    },
+    {
+      "epoch": 3.622047244094488,
+      "grad_norm": 1.328125,
+      "learning_rate": 9.684732264553247e-05,
+      "loss": 0.3815,
+      "step": 2300
+    },
+    {
+      "epoch": 3.779527559055118,
+      "grad_norm": 0.85546875,
+      "learning_rate": 7.713218696519558e-05,
+      "loss": 0.3859,
+      "step": 2400
+    },
+    {
+      "epoch": 3.937007874015748,
+      "grad_norm": 1.0234375,
+      "learning_rate": 5.929100341195187e-05,
+      "loss": 0.3779,
+      "step": 2500
+    },
+    {
+      "epoch": 4.094488188976378,
+      "grad_norm": 0.765625,
+      "learning_rate": 4.351717712746703e-05,
+      "loss": 0.3608,
+      "step": 2600
+    },
+    {
+      "epoch": 4.251968503937007,
+      "grad_norm": 1.0546875,
+      "learning_rate": 2.9981702322862735e-05,
+      "loss": 0.3582,
+      "step": 2700
+    },
+    {
+      "epoch": 4.409448818897638,
+      "grad_norm": 0.9375,
+      "learning_rate": 1.8831308637139e-05,
+      "loss": 0.3731,
+      "step": 2800
+    },
+    {
+      "epoch": 4.566929133858268,
+      "grad_norm": 0.9296875,
+      "learning_rate": 1.0186870532686742e-05,
+      "loss": 0.3588,
+      "step": 2900
+    },
+    {
+      "epoch": 4.724409448818898,
+      "grad_norm": 0.71875,
+      "learning_rate": 4.1420969706420505e-06,
+      "loss": 0.3537,
+      "step": 3000
+    },
+    {
+      "epoch": 4.881889763779528,
+      "grad_norm": 0.83203125,
+      "learning_rate": 7.625155704936715e-07,
+      "loss": 0.3614,
+      "step": 3100
+    },
     {
       "epoch": 5.0,
+      "step": 3175,
+      "total_flos": 6581450258638848.0,
+      "train_loss": 0.5889826508769839,
+      "train_runtime": 806.0376,
+      "train_samples_per_second": 31.5,
+      "train_steps_per_second": 3.939
     }
   ],
   "logging_steps": 100,
+  "max_steps": 3175,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 6581450258638848.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null