Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -2,13 +2,13 @@
 license: apache-2.0
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 datasets:
-- HuggingFaceH4/ultrachat_200k
 model-index:
 - name: tinyllama-1.1b-sft-qlora
   results: []
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 # tinyllama-1.1b-sft-qlora
-This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the HuggingFaceH4/ultrachat_200k dataset.
 It achieves the following results on the evaluation set:
 - Loss: 1.1695

 license: apache-2.0
 library_name: peft
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 datasets:
+- generator
 model-index:
 - name: tinyllama-1.1b-sft-qlora
   results: []
 # tinyllama-1.1b-sft-qlora
+This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the generator dataset.
 It achieves the following results on the evaluation set:
 - Loss: 1.1695

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "down_proj",
-    "up_proj",
-    "q_proj",
     "k_proj",
     "gate_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "down_proj",
+    "o_proj",
     "gate_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1cae67b7ab076ae66708e6a3b87058c0276d68ebda06229cf4c40e46d6bfc99
 size 25272360

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca974ae690d430e06acc45fa193b798d99f3cb3a6b25d15eab4da40c076a5b59
 size 25272360

all_results.json CHANGED Viewed

@@ -5,9 +5,9 @@
     "eval_samples": 23109,
     "eval_samples_per_second": 8.415,
     "eval_steps_per_second": 1.052,
-    "train_loss": 1.1836543822331609,
-    "train_runtime": 65654.1144,
     "train_samples": 207864,
-    "train_samples_per_second": 2.225,
-    "train_steps_per_second": 0.278
 }

     "eval_samples": 23109,
     "eval_samples_per_second": 8.415,
     "eval_steps_per_second": 1.052,
+    "train_loss": 0.0036345586762629653,
+    "train_runtime": 2327.0298,
     "train_samples": 207864,
+    "train_samples_per_second": 62.765,
+    "train_steps_per_second": 7.846
 }

runs/Apr23_17-34-48_poseidon/events.out.tfevents.1713893717.poseidon.710489.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a35260fe95b73e90a918cc5dfb0c039239e0c878a6eb673c564e43c7b0c8d7c7
+size 8269

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 1.1836543822331609,
-    "train_runtime": 65654.1144,
     "train_samples": 207864,
-    "train_samples_per_second": 2.225,
-    "train_steps_per_second": 0.278
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.0036345586762629653,
+    "train_runtime": 2327.0298,
     "train_samples": 207864,
+    "train_samples_per_second": 62.765,
+    "train_steps_per_second": 7.846
 }

trainer_state.json CHANGED Viewed

@@ -25574,20 +25574,20 @@
     },
     {
       "epoch": 1.0,
-      "eval_loss": 1.1695406436920166,
-      "eval_runtime": 1922.8575,
-      "eval_samples_per_second": 8.407,
-      "eval_steps_per_second": 1.051,
       "step": 18257
     },
     {
       "epoch": 1.0,
       "step": 18257,
       "total_flos": 1.8793056999854572e+18,
-      "train_loss": 1.1836543822331609,
-      "train_runtime": 65654.1144,
-      "train_samples_per_second": 2.225,
-      "train_steps_per_second": 0.278
     }
   ],
   "logging_steps": 5,

     },
     {
       "epoch": 1.0,
+      "eval_loss": 1.1695410013198853,
+      "eval_runtime": 2107.9295,
+      "eval_samples_per_second": 7.669,
+      "eval_steps_per_second": 0.959,
       "step": 18257
     },
     {
       "epoch": 1.0,
       "step": 18257,
       "total_flos": 1.8793056999854572e+18,
+      "train_loss": 0.0036345586762629653,
+      "train_runtime": 2327.0298,
+      "train_samples_per_second": 62.765,
+      "train_steps_per_second": 7.846
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a5fe3345ba6d55380440ee7b96cab2646f53dfd93da76be843e8a6bd8907a32
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:db31d5c3f77b1a494a8e65a7c73b64070762e72703debc87ca2ad95a5066139c
 size 5048