Training in progress, step 1900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -26,13 +26,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "gate_proj",
-    "down_proj",
-    "q_proj",
     "v_proj",
     "o_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "up_proj",
+    "q_proj",
     "o_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a44dfb5e264781c6f6c2ec17953b55c8d56028cea17c18c1ea00e1a273ca0df
 size 1370666272

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f76dbb9a45ae718cb4c4ffa542564fbd46a97583f94b15b7d3e80c39275a70f
 size 1370666272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64d8002ffbd9ad944ead17d83c487490d6c027e65dfe6f984e192a6959e76693
 size 697294462

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fb3c2e24267fa356ea44dc14e7953e417fa1d6dd44f526c4daea1bcf6b647b7
 size 697294462

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1839c408b2800d1f16254de5db0d477776bbfae78a9c676838bcb325c436cdf1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e3dad5e9640794d19b0f41e34b58f722c69f08c60cfeb247e583e12e03c10e0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4293837762562377,
   "eval_steps": 500,
-  "global_step": 1850,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12957,6 +12957,356 @@
       "learning_rate": 0.00019639739459366182,
       "loss": 0.9533,
       "step": 1850
     }
   ],
   "logging_steps": 1,
@@ -12976,7 +13326,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.211719251820544e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4409887431820819,
   "eval_steps": 500,
+  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019639739459366182,
       "loss": 0.9533,
       "step": 1850
+    },
+    {
+      "epoch": 0.42961587559475456,
+      "grad_norm": 0.47029098868370056,
+      "learning_rate": 0.0001963935140270619,
+      "loss": 0.8904,
+      "step": 1851
+    },
+    {
+      "epoch": 0.4298479749332714,
+      "grad_norm": 0.5885578393936157,
+      "learning_rate": 0.00019638963140997906,
+      "loss": 0.8957,
+      "step": 1852
+    },
+    {
+      "epoch": 0.43008007427178835,
+      "grad_norm": 0.5930177569389343,
+      "learning_rate": 0.00019638574674249587,
+      "loss": 1.0084,
+      "step": 1853
+    },
+    {
+      "epoch": 0.4303121736103052,
+      "grad_norm": 0.5558833479881287,
+      "learning_rate": 0.00019638186002469494,
+      "loss": 0.9102,
+      "step": 1854
+    },
+    {
+      "epoch": 0.4305442729488221,
+      "grad_norm": 0.5855537056922913,
+      "learning_rate": 0.000196377971256659,
+      "loss": 0.9468,
+      "step": 1855
+    },
+    {
+      "epoch": 0.430776372287339,
+      "grad_norm": 0.5754596590995789,
+      "learning_rate": 0.00019637408043847074,
+      "loss": 0.9044,
+      "step": 1856
+    },
+    {
+      "epoch": 0.43100847162585587,
+      "grad_norm": 0.584676206111908,
+      "learning_rate": 0.00019637018757021296,
+      "loss": 0.8508,
+      "step": 1857
+    },
+    {
+      "epoch": 0.43124057096437274,
+      "grad_norm": 0.49439355731010437,
+      "learning_rate": 0.0001963662926519684,
+      "loss": 0.8681,
+      "step": 1858
+    },
+    {
+      "epoch": 0.43147267030288966,
+      "grad_norm": 0.5786611437797546,
+      "learning_rate": 0.00019636239568382,
+      "loss": 0.9083,
+      "step": 1859
+    },
+    {
+      "epoch": 0.4317047696414065,
+      "grad_norm": 0.5431936383247375,
+      "learning_rate": 0.00019635849666585058,
+      "loss": 0.9406,
+      "step": 1860
+    },
+    {
+      "epoch": 0.4319368689799234,
+      "grad_norm": 0.6521342992782593,
+      "learning_rate": 0.00019635459559814314,
+      "loss": 0.8659,
+      "step": 1861
+    },
+    {
+      "epoch": 0.4321689683184403,
+      "grad_norm": 0.5077570676803589,
+      "learning_rate": 0.00019635069248078062,
+      "loss": 0.9172,
+      "step": 1862
+    },
+    {
+      "epoch": 0.4324010676569572,
+      "grad_norm": 0.5636994242668152,
+      "learning_rate": 0.00019634678731384608,
+      "loss": 0.9587,
+      "step": 1863
+    },
+    {
+      "epoch": 0.43263316699547405,
+      "grad_norm": 0.48513078689575195,
+      "learning_rate": 0.00019634288009742255,
+      "loss": 0.9519,
+      "step": 1864
+    },
+    {
+      "epoch": 0.432865266333991,
+      "grad_norm": 0.519437849521637,
+      "learning_rate": 0.00019633897083159318,
+      "loss": 0.9289,
+      "step": 1865
+    },
+    {
+      "epoch": 0.43309736567250784,
+      "grad_norm": 0.5995944738388062,
+      "learning_rate": 0.00019633505951644113,
+      "loss": 0.9566,
+      "step": 1866
+    },
+    {
+      "epoch": 0.4333294650110247,
+      "grad_norm": 0.5057395100593567,
+      "learning_rate": 0.00019633114615204958,
+      "loss": 0.9654,
+      "step": 1867
+    },
+    {
+      "epoch": 0.43356156434954163,
+      "grad_norm": 0.5791558623313904,
+      "learning_rate": 0.00019632723073850176,
+      "loss": 0.9469,
+      "step": 1868
+    },
+    {
+      "epoch": 0.4337936636880585,
+      "grad_norm": 0.5840992331504822,
+      "learning_rate": 0.000196323313275881,
+      "loss": 0.918,
+      "step": 1869
+    },
+    {
+      "epoch": 0.43402576302657536,
+      "grad_norm": 0.550893247127533,
+      "learning_rate": 0.00019631939376427062,
+      "loss": 0.8612,
+      "step": 1870
+    },
+    {
+      "epoch": 0.4342578623650923,
+      "grad_norm": 0.537064790725708,
+      "learning_rate": 0.00019631547220375398,
+      "loss": 0.9316,
+      "step": 1871
+    },
+    {
+      "epoch": 0.43448996170360915,
+      "grad_norm": 0.5622636675834656,
+      "learning_rate": 0.00019631154859441454,
+      "loss": 0.8822,
+      "step": 1872
+    },
+    {
+      "epoch": 0.434722061042126,
+      "grad_norm": 0.599727213382721,
+      "learning_rate": 0.0001963076229363357,
+      "loss": 0.956,
+      "step": 1873
+    },
+    {
+      "epoch": 0.43495416038064294,
+      "grad_norm": 0.5084268450737,
+      "learning_rate": 0.00019630369522960104,
+      "loss": 0.8993,
+      "step": 1874
+    },
+    {
+      "epoch": 0.4351862597191598,
+      "grad_norm": 0.547834038734436,
+      "learning_rate": 0.00019629976547429402,
+      "loss": 0.9046,
+      "step": 1875
+    },
+    {
+      "epoch": 0.4354183590576767,
+      "grad_norm": 0.5189753770828247,
+      "learning_rate": 0.0001962958336704983,
+      "loss": 0.8458,
+      "step": 1876
+    },
+    {
+      "epoch": 0.4356504583961936,
+      "grad_norm": 0.501224160194397,
+      "learning_rate": 0.00019629189981829753,
+      "loss": 0.905,
+      "step": 1877
+    },
+    {
+      "epoch": 0.43588255773471046,
+      "grad_norm": 0.5444706082344055,
+      "learning_rate": 0.0001962879639177753,
+      "loss": 0.8975,
+      "step": 1878
+    },
+    {
+      "epoch": 0.43611465707322733,
+      "grad_norm": 0.5328624248504639,
+      "learning_rate": 0.00019628402596901545,
+      "loss": 0.9257,
+      "step": 1879
+    },
+    {
+      "epoch": 0.43634675641174425,
+      "grad_norm": 0.5254698991775513,
+      "learning_rate": 0.00019628008597210168,
+      "loss": 0.8739,
+      "step": 1880
+    },
+    {
+      "epoch": 0.4365788557502611,
+      "grad_norm": 0.5245271921157837,
+      "learning_rate": 0.0001962761439271178,
+      "loss": 0.8952,
+      "step": 1881
+    },
+    {
+      "epoch": 0.436810955088778,
+      "grad_norm": 0.5154178142547607,
+      "learning_rate": 0.00019627219983414768,
+      "loss": 0.9408,
+      "step": 1882
+    },
+    {
+      "epoch": 0.4370430544272949,
+      "grad_norm": 0.5660544037818909,
+      "learning_rate": 0.00019626825369327525,
+      "loss": 0.8846,
+      "step": 1883
+    },
+    {
+      "epoch": 0.4372751537658118,
+      "grad_norm": 0.5544506907463074,
+      "learning_rate": 0.0001962643055045844,
+      "loss": 0.9322,
+      "step": 1884
+    },
+    {
+      "epoch": 0.43750725310432864,
+      "grad_norm": 0.49590614438056946,
+      "learning_rate": 0.00019626035526815912,
+      "loss": 0.9737,
+      "step": 1885
+    },
+    {
+      "epoch": 0.43773935244284556,
+      "grad_norm": 0.5184259414672852,
+      "learning_rate": 0.0001962564029840835,
+      "loss": 0.9169,
+      "step": 1886
+    },
+    {
+      "epoch": 0.43797145178136243,
+      "grad_norm": 0.5171828866004944,
+      "learning_rate": 0.00019625244865244156,
+      "loss": 0.8724,
+      "step": 1887
+    },
+    {
+      "epoch": 0.4382035511198793,
+      "grad_norm": 0.606625497341156,
+      "learning_rate": 0.0001962484922733174,
+      "loss": 0.8666,
+      "step": 1888
+    },
+    {
+      "epoch": 0.4384356504583962,
+      "grad_norm": 0.5377411842346191,
+      "learning_rate": 0.0001962445338467952,
+      "loss": 0.9142,
+      "step": 1889
+    },
+    {
+      "epoch": 0.4386677497969131,
+      "grad_norm": 0.5942894220352173,
+      "learning_rate": 0.00019624057337295922,
+      "loss": 0.957,
+      "step": 1890
+    },
+    {
+      "epoch": 0.43889984913542995,
+      "grad_norm": 0.5858636498451233,
+      "learning_rate": 0.00019623661085189364,
+      "loss": 0.9022,
+      "step": 1891
+    },
+    {
+      "epoch": 0.4391319484739469,
+      "grad_norm": 0.5353084206581116,
+      "learning_rate": 0.00019623264628368275,
+      "loss": 0.8723,
+      "step": 1892
+    },
+    {
+      "epoch": 0.43936404781246374,
+      "grad_norm": 0.5895339846611023,
+      "learning_rate": 0.0001962286796684109,
+      "loss": 0.9509,
+      "step": 1893
+    },
+    {
+      "epoch": 0.4395961471509806,
+      "grad_norm": 0.5124474763870239,
+      "learning_rate": 0.0001962247110061625,
+      "loss": 0.9523,
+      "step": 1894
+    },
+    {
+      "epoch": 0.43982824648949753,
+      "grad_norm": 0.53212571144104,
+      "learning_rate": 0.00019622074029702194,
+      "loss": 0.8931,
+      "step": 1895
+    },
+    {
+      "epoch": 0.4400603458280144,
+      "grad_norm": 0.4760664999485016,
+      "learning_rate": 0.00019621676754107367,
+      "loss": 0.9609,
+      "step": 1896
+    },
+    {
+      "epoch": 0.44029244516653127,
+      "grad_norm": 0.4855426549911499,
+      "learning_rate": 0.0001962127927384022,
+      "loss": 0.9561,
+      "step": 1897
+    },
+    {
+      "epoch": 0.4405245445050482,
+      "grad_norm": 0.6112794876098633,
+      "learning_rate": 0.00019620881588909212,
+      "loss": 0.9166,
+      "step": 1898
+    },
+    {
+      "epoch": 0.44075664384356505,
+      "grad_norm": 0.5399686098098755,
+      "learning_rate": 0.00019620483699322802,
+      "loss": 0.8998,
+      "step": 1899
+    },
+    {
+      "epoch": 0.4409887431820819,
+      "grad_norm": 0.5019717216491699,
+      "learning_rate": 0.00019620085605089448,
+      "loss": 0.8652,
+      "step": 1900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.433657609977856e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null