Model save

Browse files

Files changed (8) hide show

README.md +5 -7
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
all_results.json +6 -6
runs/Jun10_04-56-56_48ddfe8e991f/events.out.tfevents.1717995436.48ddfe8e991f.131950.0 +3 -0
train_results.json +6 -6
trainer_state.json +12 -140
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,13 +2,13 @@
 license: gemma
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
 base_model: google/gemma-2b
 datasets:
-- llama-duo/synth_summarize_dataset_dedup
 model-index:
 - name: gemma2b-summarize-gemini1_5flash-64k
   results: []
@@ -19,9 +19,9 @@ should probably proofread and complete it, then remove this comment. -->
 # gemma2b-summarize-gemini1_5flash-64k
-This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the llama-duo/synth_summarize_dataset_dedup dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.7185
 ## Model description
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 15
 ### Training results
@@ -71,8 +71,6 @@ The following hyperparameters were used during training:
 | 0.9208        | 10.9905 | 577  | 2.7079          |
 | 0.9195        | 12.0    | 630  | 2.7148          |
 | 0.9212        | 12.9905 | 682  | 2.7154          |
-| 0.9136        | 14.0    | 735  | 2.7181          |
-| 0.9103        | 14.8571 | 780  | 2.7185          |
 ### Framework versions

 license: gemma
 library_name: peft
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 base_model: google/gemma-2b
 datasets:
+- generator
 model-index:
 - name: gemma2b-summarize-gemini1_5flash-64k
   results: []
 # gemma2b-summarize-gemini1_5flash-64k
+This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.7154
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 10
 ### Training results
 | 0.9208        | 10.9905 | 577  | 2.7079          |
 | 0.9195        | 12.0    | 630  | 2.7148          |
 | 0.9212        | 12.9905 | 682  | 2.7154          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -21,11 +21,11 @@
   "revision": null,
   "target_modules": [
     "k_proj",
-    "down_proj",
     "q_proj",
-    "o_proj",
-    "v_proj",
     "gate_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "revision": null,
   "target_modules": [
     "k_proj",
     "q_proj",
     "gate_proj",
+    "down_proj",
+    "v_proj",
+    "o_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ae6086e7cdaa1c7742eb4042577d161b6afc3838df568f2b317918f4e82a95d
 size 39256960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c2747846863e52a0aaf9d156c39e150d13213de860c580c3e4dd71702df720c
 size 39256960

all_results.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
-    "epoch": 14.857142857142858,
     "eval_loss": 2.71852707862854,
     "eval_runtime": 0.4953,
     "eval_samples": 25,
     "eval_samples_per_second": 42.399,
     "eval_steps_per_second": 2.019,
-    "total_flos": 1.2277516310308454e+18,
-    "train_loss": 1.077159938445458,
-    "train_runtime": 4175.1629,
     "train_samples": 63353,
-    "train_samples_per_second": 47.883,
-    "train_steps_per_second": 0.187
 }

 {
+    "epoch": 13.333333333333334,
     "eval_loss": 2.71852707862854,
     "eval_runtime": 0.4953,
     "eval_samples": 25,
     "eval_samples_per_second": 42.399,
     "eval_steps_per_second": 2.019,
+    "total_flos": 1.1018283868225536e+18,
+    "train_loss": 0.0,
+    "train_runtime": 3.5395,
     "train_samples": 63353,
+    "train_samples_per_second": 37654.826,
+    "train_steps_per_second": 146.913
 }

runs/Jun10_04-56-56_48ddfe8e991f/events.out.tfevents.1717995436.48ddfe8e991f.131950.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e19825ff88c6f61e8a91bd83bb45388c64edd43954ece8b9a928d99eeafb6a2
+size 5959

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 14.857142857142858,
-    "total_flos": 1.2277516310308454e+18,
-    "train_loss": 1.077159938445458,
-    "train_runtime": 4175.1629,
     "train_samples": 63353,
-    "train_samples_per_second": 47.883,
-    "train_steps_per_second": 0.187
 }

 {
+    "epoch": 13.333333333333334,
+    "total_flos": 1.1018283868225536e+18,
+    "train_loss": 0.0,
+    "train_runtime": 3.5395,
     "train_samples": 63353,
+    "train_samples_per_second": 37654.826,
+    "train_steps_per_second": 146.913
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 14.857142857142858,
   "eval_steps": 500,
-  "global_step": 780,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1100,147 +1100,19 @@
       "step": 700
     },
     {
-      "epoch": 13.428571428571429,
-      "grad_norm": 0.23828125,
-      "learning_rate": 5.580037533961546e-06,
-      "loss": 0.9212,
-      "step": 705
-    },
-    {
-      "epoch": 13.523809523809524,
-      "grad_norm": 0.2373046875,
-      "learning_rate": 4.866728191731829e-06,
-      "loss": 0.909,
-      "step": 710
-    },
-    {
-      "epoch": 13.619047619047619,
-      "grad_norm": 0.2431640625,
-      "learning_rate": 4.20104876845111e-06,
-      "loss": 0.9146,
-      "step": 715
-    },
-    {
-      "epoch": 13.714285714285714,
-      "grad_norm": 0.2431640625,
-      "learning_rate": 3.5833325466437694e-06,
-      "loss": 0.9107,
-      "step": 720
-    },
-    {
-      "epoch": 13.80952380952381,
-      "grad_norm": 0.2451171875,
-      "learning_rate": 3.013888795328057e-06,
-      "loss": 0.9136,
-      "step": 725
-    },
-    {
-      "epoch": 13.904761904761905,
-      "grad_norm": 0.236328125,
-      "learning_rate": 2.4930026151759766e-06,
-      "loss": 0.9147,
-      "step": 730
-    },
-    {
-      "epoch": 14.0,
-      "grad_norm": 0.2470703125,
-      "learning_rate": 2.0209347957732328e-06,
-      "loss": 0.9136,
-      "step": 735
-    },
-    {
-      "epoch": 14.0,
-      "eval_loss": 2.7180798053741455,
-      "eval_runtime": 0.4838,
-      "eval_samples_per_second": 43.41,
-      "eval_steps_per_second": 2.067,
-      "step": 735
-    },
-    {
-      "epoch": 14.095238095238095,
-      "grad_norm": 0.23828125,
-      "learning_rate": 1.5979216850509848e-06,
-      "loss": 0.9092,
-      "step": 740
-    },
-    {
-      "epoch": 14.19047619047619,
-      "grad_norm": 0.2578125,
-      "learning_rate": 1.2241750709546917e-06,
-      "loss": 0.9159,
-      "step": 745
-    },
-    {
-      "epoch": 14.285714285714286,
-      "grad_norm": 0.236328125,
-      "learning_rate": 8.998820754091531e-07,
-      "loss": 0.9153,
-      "step": 750
-    },
-    {
-      "epoch": 14.380952380952381,
-      "grad_norm": 0.244140625,
-      "learning_rate": 6.25205060633205e-07,
-      "loss": 0.9114,
-      "step": 755
-    },
-    {
-      "epoch": 14.476190476190476,
-      "grad_norm": 0.244140625,
-      "learning_rate": 4.0028154785050063e-07,
-      "loss": 0.9163,
-      "step": 760
-    },
-    {
-      "epoch": 14.571428571428571,
-      "grad_norm": 0.236328125,
-      "learning_rate": 2.2522414843748618e-07,
-      "loss": 0.9153,
-      "step": 765
-    },
-    {
-      "epoch": 14.666666666666666,
-      "grad_norm": 0.23828125,
-      "learning_rate": 1.0012050754277802e-07,
-      "loss": 0.912,
-      "step": 770
-    },
-    {
-      "epoch": 14.761904761904763,
-      "grad_norm": 0.25,
-      "learning_rate": 2.5033260206275277e-08,
-      "loss": 0.9155,
-      "step": 775
-    },
-    {
-      "epoch": 14.857142857142858,
-      "grad_norm": 0.240234375,
-      "learning_rate": 0.0,
-      "loss": 0.9103,
-      "step": 780
-    },
-    {
-      "epoch": 14.857142857142858,
-      "eval_loss": 2.71852707862854,
-      "eval_runtime": 0.4854,
-      "eval_samples_per_second": 43.26,
-      "eval_steps_per_second": 2.06,
-      "step": 780
-    },
-    {
-      "epoch": 14.857142857142858,
-      "step": 780,
-      "total_flos": 1.2277516310308454e+18,
-      "train_loss": 1.077159938445458,
-      "train_runtime": 4175.1629,
-      "train_samples_per_second": 47.883,
-      "train_steps_per_second": 0.187
     }
   ],
   "logging_steps": 5,
-  "max_steps": 780,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 15,
   "save_steps": 100,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -1254,7 +1126,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2277516310308454e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 13.333333333333334,
   "eval_steps": 500,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 700
     },
     {
+      "epoch": 13.333333333333334,
+      "step": 700,
+      "total_flos": 1.1018283868225536e+18,
+      "train_loss": 0.0,
+      "train_runtime": 3.5395,
+      "train_samples_per_second": 37654.826,
+      "train_steps_per_second": 146.913
     }
   ],
   "logging_steps": 5,
+  "max_steps": 520,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
   "save_steps": 100,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.1018283868225536e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1ffb27f0fd6a7143dd5c1775871c7ef4697a68201a714b4e32514ace64902a5
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:864c40eca7ce2f5228b456390efab4e31903bcf7dac802808ea32a49d0bbfa72
 size 5304