NicholasCorrado
/

tinyllama-1.1b-chat-v1.0-hh-dpo

@@ -3,9 +3,15 @@ library_name: transformers
 license: apache-2.0
 base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 tags:
 - trl
 - dpo
 - generated_from_trainer
 model-index:
 - name: tinyllama-1.1b-chat-v1.0-hh-dpo
   results: []
@@ -16,7 +22,17 @@ should probably proofread and complete it, then remove this comment. -->
 # tinyllama-1.1b-chat-v1.0-hh-dpo
-This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
 ## Model description

 license: apache-2.0
 base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 tags:
+- alignment-handbook
 - trl
 - dpo
 - generated_from_trainer
+- trl
+- dpo
+- generated_from_trainer
+datasets:
+- HuggingFaceH4/hh-rlhf-h4
 model-index:
 - name: tinyllama-1.1b-chat-v1.0-hh-dpo
   results: []
 # tinyllama-1.1b-chat-v1.0-hh-dpo
+This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the HuggingFaceH4/hh-rlhf-h4 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6732
+- Rewards/chosen: -0.5129
+- Rewards/rejected: -0.5655
+- Rewards/accuracies: 0.5578
+- Rewards/margins: 0.0526
+- Logps/rejected: -214.5289
+- Logps/chosen: -211.9843
+- Logits/rejected: -2.4006
+- Logits/chosen: -2.4020
 ## Model description

all_results.json CHANGED Viewed

@@ -1,5 +1,18 @@
 {
     "epoch": 0.9996020692399522,
     "total_flos": 0.0,
     "train_loss": 0.678918091354856,
     "train_runtime": 7383.0158,

 {
     "epoch": 0.9996020692399522,
+    "eval_logits/chosen": -2.402043581008911,
+    "eval_logits/rejected": -2.4006083011627197,
+    "eval_logps/chosen": -211.9842987060547,
+    "eval_logps/rejected": -214.5289306640625,
+    "eval_loss": 0.673168957233429,
+    "eval_rewards/accuracies": 0.5578358173370361,
+    "eval_rewards/chosen": -0.5129190683364868,
+    "eval_rewards/margins": 0.052563026547431946,
+    "eval_rewards/rejected": -0.5654820799827576,
+    "eval_runtime": 167.0868,
+    "eval_samples": 8552,
+    "eval_samples_per_second": 51.183,
+    "eval_steps_per_second": 0.802,
     "total_flos": 0.0,
     "train_loss": 0.678918091354856,
     "train_runtime": 7383.0158,

config.json CHANGED Viewed

@@ -24,6 +24,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.1",
-  "use_cache": false,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.1",
+  "use_cache": true,
   "vocab_size": 32000
 }

eval_results.json ADDED Viewed

+{
+    "epoch": 0.9996020692399522,
+    "eval_logits/chosen": -2.402043581008911,
+    "eval_logits/rejected": -2.4006083011627197,
+    "eval_logps/chosen": -211.9842987060547,
+    "eval_logps/rejected": -214.5289306640625,
+    "eval_loss": 0.673168957233429,
+    "eval_rewards/accuracies": 0.5578358173370361,
+    "eval_rewards/chosen": -0.5129190683364868,
+    "eval_rewards/margins": 0.052563026547431946,
+    "eval_rewards/rejected": -0.5654820799827576,
+    "eval_runtime": 167.0868,
+    "eval_samples": 8552,
+    "eval_samples_per_second": 51.183,
+    "eval_steps_per_second": 0.802
+}