Model save

Browse files

Files changed (11) hide show

README.md +5 -8
all_results.json +5 -5
config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0 +3 -0
train_results.json +5 -5
trainer_state.json +25 -25
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,15 +2,12 @@
 license: apache-2.0
 base_model: Qwen/Qwen2-7B
 tags:
-- alignment-handbook
-- trl
-- sft
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- HuggingFaceH4/ultrachat_200k
 model-index:
 - name: zephyr-qwen2-7b-sft
   results: []
@@ -21,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # zephyr-qwen2-7b-sft
-This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the HuggingFaceH4/ultrachat_200k dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0646
 ## Model description
@@ -60,7 +57,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.0626        | 1.0   | 956  | 1.0646          |
 ### Framework versions

 license: apache-2.0
 base_model: Qwen/Qwen2-7B
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 datasets:
+- generator
 model-index:
 - name: zephyr-qwen2-7b-sft
   results: []
 # zephyr-qwen2-7b-sft
+This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.0645
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.0627        | 1.0   | 956  | 1.0645          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -5,10 +5,10 @@
     "eval_samples": 23109,
     "eval_samples_per_second": 47.288,
     "eval_steps_per_second": 0.74,
-    "total_flos": 500925122740224.0,
-    "train_loss": 1.0766646904426638,
-    "train_runtime": 10839.8443,
     "train_samples": 207864,
-    "train_samples_per_second": 11.286,
-    "train_steps_per_second": 0.088
 }

     "eval_samples": 23109,
     "eval_samples_per_second": 47.288,
     "eval_steps_per_second": 0.74,
+    "total_flos": 500662995517440.0,
+    "train_loss": 0.06220405869902926,
+    "train_runtime": 877.8841,
     "train_samples": 207864,
+    "train_samples_per_second": 139.358,
+    "train_steps_per_second": 1.089
 }

config.json CHANGED Viewed

@@ -22,7 +22,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.2",
-  "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.2",
+  "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 152064
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f6039494ac641d99d13b6f028d2a309d117292be0301ad9e77ccb61d5d09d61
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:5346f7673f73f551aaaa605516577660e1eeedcc29154ded68a8a39e1bf72c4c
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7907147daaa9fad1b0dd1ccd01bf743294c785fed19b77a288ef3e6cbe91768a
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:b42de9be5847138b7967316f3dad2efd4db71d2dc2042256575769e7883a189a
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bea1812025c1808c2ebc632f73cad6038809c45373791f1789cf32259447d9d4
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:90e5f201a3835937500f20e67d4c791a7124c5a060229fc1f92b17ebf3fda4b2
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3f1bf1dbca8a6fa54c435b5dee8ee34b090850ba9b780c8d6f3083db9d29355
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:634e5128c71a098d1f1cad9837cc743ae34a83706e15c9cb9df0ac5d7fc76820
 size 1089994880

runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c86dcc545bc64c7e85477584418e0651f4252ace1b3894df3f4e8f5359dddf47
+size 7807

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
-    "total_flos": 500925122740224.0,
-    "train_loss": 1.0766646904426638,
-    "train_runtime": 10839.8443,
     "train_samples": 207864,
-    "train_samples_per_second": 11.286,
-    "train_steps_per_second": 0.088
 }

 {
     "epoch": 1.0,
+    "total_flos": 500662995517440.0,
+    "train_loss": 0.06220405869902926,
+    "train_runtime": 877.8841,
     "train_samples": 207864,
+    "train_samples_per_second": 139.358,
+    "train_steps_per_second": 1.089
 }

trainer_state.json CHANGED Viewed

@@ -1277,97 +1277,97 @@
     },
     {
       "epoch": 0.946652719665272,
-      "grad_norm": 0.48247289512404495,
       "learning_rate": 1.730440504639408e-07,
       "loss": 1.058,
       "step": 905
     },
     {
       "epoch": 0.9518828451882845,
-      "grad_norm": 0.48826128008832265,
       "learning_rate": 1.408530770781813e-07,
       "loss": 1.0526,
       "step": 910
     },
     {
       "epoch": 0.9571129707112971,
-      "grad_norm": 0.4969532853804826,
       "learning_rate": 1.1195115097079268e-07,
-      "loss": 1.0685,
       "step": 915
     },
     {
       "epoch": 0.9623430962343096,
-      "grad_norm": 0.48920579432423383,
       "learning_rate": 8.634791392946429e-08,
       "loss": 1.0676,
       "step": 920
     },
     {
       "epoch": 0.9675732217573222,
-      "grad_norm": 0.49070804064035406,
       "learning_rate": 6.405190728721033e-08,
       "loss": 1.0455,
       "step": 925
     },
     {
       "epoch": 0.9728033472803347,
-      "grad_norm": 0.47721070672447036,
       "learning_rate": 4.5070569072952485e-08,
-      "loss": 1.061,
       "step": 930
     },
     {
       "epoch": 0.9780334728033473,
-      "grad_norm": 0.4960618241854113,
       "learning_rate": 2.9410231530168087e-08,
-      "loss": 1.0499,
       "step": 935
     },
     {
       "epoch": 0.9832635983263598,
-      "grad_norm": 0.4727473862892527,
       "learning_rate": 1.7076119004429958e-08,
       "loss": 1.0763,
       "step": 940
     },
     {
       "epoch": 0.9884937238493724,
-      "grad_norm": 0.4932911494908516,
       "learning_rate": 8.072346200544979e-09,
       "loss": 1.0672,
       "step": 945
     },
     {
       "epoch": 0.9937238493723849,
-      "grad_norm": 0.48937555835484964,
       "learning_rate": 2.401916809872118e-09,
       "loss": 1.0629,
       "step": 950
     },
     {
       "epoch": 0.9989539748953975,
-      "grad_norm": 0.47678060880513334,
       "learning_rate": 6.672250828620996e-11,
-      "loss": 1.0626,
       "step": 955
     },
     {
       "epoch": 1.0,
-      "eval_loss": 1.0645579099655151,
-      "eval_runtime": 287.2685,
-      "eval_samples_per_second": 47.13,
-      "eval_steps_per_second": 0.738,
       "step": 956
     },
     {
       "epoch": 1.0,
       "step": 956,
-      "total_flos": 500925122740224.0,
-      "train_loss": 1.0766646904426638,
-      "train_runtime": 10839.8443,
-      "train_samples_per_second": 11.286,
-      "train_steps_per_second": 0.088
     }
   ],
   "logging_steps": 5,
@@ -1375,7 +1375,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
-  "total_flos": 500925122740224.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.946652719665272,
+      "grad_norm": 0.4832426848529184,
       "learning_rate": 1.730440504639408e-07,
       "loss": 1.058,
       "step": 905
     },
     {
       "epoch": 0.9518828451882845,
+      "grad_norm": 0.48799198233407015,
       "learning_rate": 1.408530770781813e-07,
       "loss": 1.0526,
       "step": 910
     },
     {
       "epoch": 0.9571129707112971,
+      "grad_norm": 0.4969562812336113,
       "learning_rate": 1.1195115097079268e-07,
+      "loss": 1.0684,
       "step": 915
     },
     {
       "epoch": 0.9623430962343096,
+      "grad_norm": 0.4892866844912397,
       "learning_rate": 8.634791392946429e-08,
       "loss": 1.0676,
       "step": 920
     },
     {
       "epoch": 0.9675732217573222,
+      "grad_norm": 0.49055262840153824,
       "learning_rate": 6.405190728721033e-08,
       "loss": 1.0455,
       "step": 925
     },
     {
       "epoch": 0.9728033472803347,
+      "grad_norm": 0.47689555635255854,
       "learning_rate": 4.5070569072952485e-08,
+      "loss": 1.0609,
       "step": 930
     },
     {
       "epoch": 0.9780334728033473,
+      "grad_norm": 0.4955325802322405,
       "learning_rate": 2.9410231530168087e-08,
+      "loss": 1.0498,
       "step": 935
     },
     {
       "epoch": 0.9832635983263598,
+      "grad_norm": 0.47274550178714503,
       "learning_rate": 1.7076119004429958e-08,
       "loss": 1.0763,
       "step": 940
     },
     {
       "epoch": 0.9884937238493724,
+      "grad_norm": 0.49350697124044746,
       "learning_rate": 8.072346200544979e-09,
       "loss": 1.0672,
       "step": 945
     },
     {
       "epoch": 0.9937238493723849,
+      "grad_norm": 0.4887853656062252,
       "learning_rate": 2.401916809872118e-09,
       "loss": 1.0629,
       "step": 950
     },
     {
       "epoch": 0.9989539748953975,
+      "grad_norm": 0.4764956168422736,
       "learning_rate": 6.672250828620996e-11,
+      "loss": 1.0627,
       "step": 955
     },
     {
       "epoch": 1.0,
+      "eval_loss": 1.0645456314086914,
+      "eval_runtime": 285.3875,
+      "eval_samples_per_second": 47.441,
+      "eval_steps_per_second": 0.743,
       "step": 956
     },
     {
       "epoch": 1.0,
       "step": 956,
+      "total_flos": 500662995517440.0,
+      "train_loss": 0.06220405869902926,
+      "train_runtime": 877.8841,
+      "train_samples_per_second": 139.358,
+      "train_steps_per_second": 1.089
     }
   ],
   "logging_steps": 5,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
+  "total_flos": 500662995517440.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:639c1527f60111741882e196954883997aec4a9ccba7483a65a28fc1a4187617
 size 6200

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a961b2dc9433696ebda3812a203e258f2b437c5b7c7dd434f0409fcfebe52fe
 size 6200