gemma_2b_social_reasoning_reward_model

Browse files

Files changed (6) hide show

README.md +20 -9
adapter_config.json +1 -1
adapter_model.safetensors +2 -2
special_tokens_map.json +1 -7
tokenizer_config.json +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -9,19 +9,19 @@ base_model: google/gemma-2b
 metrics:
 - accuracy
 model-index:
-- name: reward_model
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# reward_model
 This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6165
-- Accuracy: 0.6706
 ## Model description
@@ -41,13 +41,13 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0005
-- train_batch_size: 32
-- eval_batch_size: 32
 - seed: 42
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 128
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 10
 - num_epochs: 3
@@ -55,7 +55,18 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 0.5172        | 1.67  | 30   | 0.6530          | 0.6391   |
 ### Framework versions

 metrics:
 - accuracy
 model-index:
+- name: gemma_2b_social_reasoning_reward_model
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# gemma_2b_social_reasoning_reward_model
 This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6045
+- Accuracy: 0.6818
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0005
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 - num_epochs: 3
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 0.8047        | 0.24  | 10   | 0.7041          | 0.5447   |
+| 0.6758        | 0.48  | 20   | 0.6179          | 0.6725   |
+| 0.6436        | 0.72  | 30   | 0.6066          | 0.6690   |
+| 0.6423        | 0.96  | 40   | 0.6053          | 0.6813   |
+| 0.5838        | 1.2   | 50   | 0.6080          | 0.6637   |
+| 0.5871        | 1.44  | 60   | 0.6301          | 0.6480   |
+| 0.5928        | 1.68  | 70   | 0.6338          | 0.6515   |
+| 0.5777        | 1.92  | 80   | 0.6198          | 0.6375   |
+| 0.5378        | 2.16  | 90   | 0.6225          | 0.6392   |
+| 0.5163        | 2.4   | 100  | 0.6222          | 0.6532   |
+| 0.4963        | 2.63  | 110  | 0.6224          | 0.6567   |
+| 0.5176        | 2.87  | 120  | 0.6229          | 0.6550   |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:885e4dc9e4914b57e04cf8a53fe6a6a3a8cf7acc903e1f2c29baf16e425ff6a3
-size 7382336

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d38f5803bc03fe4650091c23e4cb0d5cbad8ac7dd2ec4f73d350c5281d1704a
+size 14763488

special_tokens_map.json CHANGED Viewed

@@ -13,13 +13,7 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "<eos>",
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -40,7 +40,7 @@
   "eos_token": "<eos>",
   "legacy": null,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<pad>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",

   "eos_token": "<eos>",
   "legacy": null,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1ad659a7c27a1bd5a7e8340c70b7820198768eac7e89fea17c23138c078f482
-size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:238ed77d8152efed5f0fea1c0c4f62ae52975da6e0ad69f6d803dbce7985c7fa
+size 4920