Training in progress, step 2950, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -26,13 +26,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
     "up_proj",
     "q_proj",
-    "o_proj",
     "k_proj",
-    "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
+    "gate_proj",
     "up_proj",
     "q_proj",
+    "v_proj",
     "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85e10776ed7d2feec702f85a92294fc572495be458fad36bc37e21242039a14d
 size 1370666272

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c2dded99c0ee7ca1abb8f2fabb96c5156afb103ab3ce6e1c5aaa23701b8648f
 size 1370666272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f328674c88f0186255ee5dbf4ac7f148eb4bef19de18a361c1ed0eb9ce9660bb
 size 697294462

 version https://git-lfs.github.com/spec/v1
+oid sha256:b045aafc33f03191b84693af1956fd4ffbf7ae3916ccc75dfaad968038476fff
 size 697294462

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:605d9f0439096f21199e65a6f7490d22d8285df735f81d56920505482985be35
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e748c5d7e6180ed81327ab2a4f0165f8cdc32090ab49af955d587517fb12cdd7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6730880816989672,
   "eval_steps": 500,
-  "global_step": 2900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -20307,6 +20307,356 @@
       "learning_rate": 0.00019121161303315963,
       "loss": 0.8731,
       "step": 2900
     }
   ],
   "logging_steps": 1,
@@ -20326,7 +20676,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2872424773124096e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6846930486248114,
   "eval_steps": 500,
+  "global_step": 2950,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019121161303315963,
       "loss": 0.8731,
       "step": 2900
+    },
+    {
+      "epoch": 0.673320181037484,
+      "grad_norm": 0.4451583921909332,
+      "learning_rate": 0.00019120563323736343,
+      "loss": 0.8934,
+      "step": 2901
+    },
+    {
+      "epoch": 0.6735522803760009,
+      "grad_norm": 0.41901981830596924,
+      "learning_rate": 0.00019119965150144095,
+      "loss": 0.8637,
+      "step": 2902
+    },
+    {
+      "epoch": 0.6737843797145178,
+      "grad_norm": 0.42898762226104736,
+      "learning_rate": 0.00019119366782551937,
+      "loss": 0.8929,
+      "step": 2903
+    },
+    {
+      "epoch": 0.6740164790530347,
+      "grad_norm": 0.4139856994152069,
+      "learning_rate": 0.00019118768220972596,
+      "loss": 0.8958,
+      "step": 2904
+    },
+    {
+      "epoch": 0.6742485783915516,
+      "grad_norm": 0.4518340528011322,
+      "learning_rate": 0.0001911816946541881,
+      "loss": 0.884,
+      "step": 2905
+    },
+    {
+      "epoch": 0.6744806777300685,
+      "grad_norm": 0.4949742555618286,
+      "learning_rate": 0.00019117570515903313,
+      "loss": 0.9065,
+      "step": 2906
+    },
+    {
+      "epoch": 0.6747127770685853,
+      "grad_norm": 0.42285311222076416,
+      "learning_rate": 0.00019116971372438847,
+      "loss": 0.9126,
+      "step": 2907
+    },
+    {
+      "epoch": 0.6749448764071022,
+      "grad_norm": 0.46767348051071167,
+      "learning_rate": 0.00019116372035038153,
+      "loss": 0.8784,
+      "step": 2908
+    },
+    {
+      "epoch": 0.6751769757456191,
+      "grad_norm": 0.48399636149406433,
+      "learning_rate": 0.00019115772503713985,
+      "loss": 0.8913,
+      "step": 2909
+    },
+    {
+      "epoch": 0.6754090750841361,
+      "grad_norm": 0.44633030891418457,
+      "learning_rate": 0.00019115172778479093,
+      "loss": 0.8711,
+      "step": 2910
+    },
+    {
+      "epoch": 0.6756411744226529,
+      "grad_norm": 0.43487444519996643,
+      "learning_rate": 0.00019114572859346235,
+      "loss": 0.8847,
+      "step": 2911
+    },
+    {
+      "epoch": 0.6758732737611698,
+      "grad_norm": 0.3979194760322571,
+      "learning_rate": 0.00019113972746328178,
+      "loss": 0.849,
+      "step": 2912
+    },
+    {
+      "epoch": 0.6761053730996867,
+      "grad_norm": 0.4204396605491638,
+      "learning_rate": 0.0001911337243943768,
+      "loss": 0.8596,
+      "step": 2913
+    },
+    {
+      "epoch": 0.6763374724382035,
+      "grad_norm": 0.41835030913352966,
+      "learning_rate": 0.0001911277193868751,
+      "loss": 0.8431,
+      "step": 2914
+    },
+    {
+      "epoch": 0.6765695717767204,
+      "grad_norm": 0.4458625912666321,
+      "learning_rate": 0.00019112171244090452,
+      "loss": 0.8341,
+      "step": 2915
+    },
+    {
+      "epoch": 0.6768016711152374,
+      "grad_norm": 0.4265308976173401,
+      "learning_rate": 0.0001911157035565927,
+      "loss": 0.8193,
+      "step": 2916
+    },
+    {
+      "epoch": 0.6770337704537542,
+      "grad_norm": 0.4003806412220001,
+      "learning_rate": 0.0001911096927340676,
+      "loss": 0.8821,
+      "step": 2917
+    },
+    {
+      "epoch": 0.6772658697922711,
+      "grad_norm": 0.44573527574539185,
+      "learning_rate": 0.00019110367997345697,
+      "loss": 0.864,
+      "step": 2918
+    },
+    {
+      "epoch": 0.677497969130788,
+      "grad_norm": 0.4213849902153015,
+      "learning_rate": 0.00019109766527488877,
+      "loss": 0.8711,
+      "step": 2919
+    },
+    {
+      "epoch": 0.6777300684693048,
+      "grad_norm": 0.41736915707588196,
+      "learning_rate": 0.00019109164863849096,
+      "loss": 0.8666,
+      "step": 2920
+    },
+    {
+      "epoch": 0.6779621678078217,
+      "grad_norm": 0.4173840284347534,
+      "learning_rate": 0.00019108563006439147,
+      "loss": 0.8964,
+      "step": 2921
+    },
+    {
+      "epoch": 0.6781942671463387,
+      "grad_norm": 0.4290173649787903,
+      "learning_rate": 0.00019107960955271836,
+      "loss": 0.8684,
+      "step": 2922
+    },
+    {
+      "epoch": 0.6784263664848555,
+      "grad_norm": 0.4732690751552582,
+      "learning_rate": 0.0001910735871035997,
+      "loss": 0.844,
+      "step": 2923
+    },
+    {
+      "epoch": 0.6786584658233724,
+      "grad_norm": 0.44380733370780945,
+      "learning_rate": 0.00019106756271716362,
+      "loss": 0.8779,
+      "step": 2924
+    },
+    {
+      "epoch": 0.6788905651618893,
+      "grad_norm": 0.4828498959541321,
+      "learning_rate": 0.00019106153639353822,
+      "loss": 0.8606,
+      "step": 2925
+    },
+    {
+      "epoch": 0.6791226645004061,
+      "grad_norm": 0.4402746260166168,
+      "learning_rate": 0.00019105550813285175,
+      "loss": 0.8463,
+      "step": 2926
+    },
+    {
+      "epoch": 0.679354763838923,
+      "grad_norm": 0.44497203826904297,
+      "learning_rate": 0.00019104947793523234,
+      "loss": 0.8601,
+      "step": 2927
+    },
+    {
+      "epoch": 0.67958686317744,
+      "grad_norm": 0.44765856862068176,
+      "learning_rate": 0.00019104344580080838,
+      "loss": 0.8867,
+      "step": 2928
+    },
+    {
+      "epoch": 0.6798189625159569,
+      "grad_norm": 0.43054118752479553,
+      "learning_rate": 0.00019103741172970818,
+      "loss": 0.8119,
+      "step": 2929
+    },
+    {
+      "epoch": 0.6800510618544737,
+      "grad_norm": 0.555328369140625,
+      "learning_rate": 0.00019103137572206,
+      "loss": 0.8219,
+      "step": 2930
+    },
+    {
+      "epoch": 0.6802831611929906,
+      "grad_norm": 0.45921704173088074,
+      "learning_rate": 0.0001910253377779923,
+      "loss": 0.8887,
+      "step": 2931
+    },
+    {
+      "epoch": 0.6805152605315075,
+      "grad_norm": 0.4183528423309326,
+      "learning_rate": 0.00019101929789763354,
+      "loss": 0.885,
+      "step": 2932
+    },
+    {
+      "epoch": 0.6807473598700243,
+      "grad_norm": 0.4342934787273407,
+      "learning_rate": 0.00019101325608111218,
+      "loss": 0.9084,
+      "step": 2933
+    },
+    {
+      "epoch": 0.6809794592085413,
+      "grad_norm": 0.41013672947883606,
+      "learning_rate": 0.0001910072123285567,
+      "loss": 0.8773,
+      "step": 2934
+    },
+    {
+      "epoch": 0.6812115585470582,
+      "grad_norm": 0.4397852122783661,
+      "learning_rate": 0.00019100116664009576,
+      "loss": 0.8478,
+      "step": 2935
+    },
+    {
+      "epoch": 0.681443657885575,
+      "grad_norm": 0.46658027172088623,
+      "learning_rate": 0.00019099511901585786,
+      "loss": 0.8682,
+      "step": 2936
+    },
+    {
+      "epoch": 0.6816757572240919,
+      "grad_norm": 0.4161824584007263,
+      "learning_rate": 0.00019098906945597168,
+      "loss": 0.8447,
+      "step": 2937
+    },
+    {
+      "epoch": 0.6819078565626088,
+      "grad_norm": 0.45820096135139465,
+      "learning_rate": 0.00019098301796056593,
+      "loss": 0.8632,
+      "step": 2938
+    },
+    {
+      "epoch": 0.6821399559011256,
+      "grad_norm": 0.49335211515426636,
+      "learning_rate": 0.00019097696452976935,
+      "loss": 0.8543,
+      "step": 2939
+    },
+    {
+      "epoch": 0.6823720552396426,
+      "grad_norm": 0.5060347318649292,
+      "learning_rate": 0.00019097090916371062,
+      "loss": 0.9283,
+      "step": 2940
+    },
+    {
+      "epoch": 0.6826041545781595,
+      "grad_norm": 0.5007983446121216,
+      "learning_rate": 0.00019096485186251866,
+      "loss": 0.8542,
+      "step": 2941
+    },
+    {
+      "epoch": 0.6828362539166764,
+      "grad_norm": 0.5087704062461853,
+      "learning_rate": 0.00019095879262632227,
+      "loss": 0.8908,
+      "step": 2942
+    },
+    {
+      "epoch": 0.6830683532551932,
+      "grad_norm": 0.5069675445556641,
+      "learning_rate": 0.0001909527314552503,
+      "loss": 0.9079,
+      "step": 2943
+    },
+    {
+      "epoch": 0.6833004525937101,
+      "grad_norm": 0.47137320041656494,
+      "learning_rate": 0.00019094666834943179,
+      "loss": 0.8626,
+      "step": 2944
+    },
+    {
+      "epoch": 0.683532551932227,
+      "grad_norm": 0.4283658564090729,
+      "learning_rate": 0.0001909406033089956,
+      "loss": 0.8541,
+      "step": 2945
+    },
+    {
+      "epoch": 0.6837646512707439,
+      "grad_norm": 0.46082451939582825,
+      "learning_rate": 0.00019093453633407082,
+      "loss": 0.8143,
+      "step": 2946
+    },
+    {
+      "epoch": 0.6839967506092608,
+      "grad_norm": 0.4551635682582855,
+      "learning_rate": 0.00019092846742478647,
+      "loss": 0.8945,
+      "step": 2947
+    },
+    {
+      "epoch": 0.6842288499477777,
+      "grad_norm": 0.5660843253135681,
+      "learning_rate": 0.00019092239658127167,
+      "loss": 0.8522,
+      "step": 2948
+    },
+    {
+      "epoch": 0.6844609492862945,
+      "grad_norm": 0.481251060962677,
+      "learning_rate": 0.00019091632380365553,
+      "loss": 0.8549,
+      "step": 2949
+    },
+    {
+      "epoch": 0.6846930486248114,
+      "grad_norm": 0.45565807819366455,
+      "learning_rate": 0.00019091024909206729,
+      "loss": 0.8892,
+      "step": 2950
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.3094363131281408e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null