eth-dl-rewards
/

internlm2-7b-reward-math-60k-scratch-merged

rd211 commited on 16 days ago

Commit

b50e055

verified ·

1 Parent(s): a2b6efa

Upload config

Files changed (2) hide show

config.json CHANGED Viewed

@@ -1,13 +1,12 @@
 {
-  "_name_or_path": "eth-dl-rewards/internlm2-7b-mod",
   "architectures": [
-    "InternLM2ForCausalLM"
   ],
   "attn_implementation": "eager",
   "auto_map": {
     "AutoConfig": "configuration_internlm2.InternLM2Config",
-    "AutoModel": "eth-dl-rewards/internlm2-7b-mod--modeling_internlm2.InternLM2ForSequenceClassification",
-    "AutoModelForCausalLM": "eth-dl-rewards/internlm2-7b-mod--modeling_internlm2.InternLM2ForCausalLM"
   },
   "bias": false,
   "bos_token_id": 1,
@@ -23,11 +22,12 @@
   "num_key_value_heads": 8,
   "pad_token_id": 2,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.47.1",
   "use_cache": true,
   "vocab_size": 92544

 {
+  "_name_or_path": "internlm/internlm2-7b-reward",
   "architectures": [
+    "InternLM2ForRewardModel"
   ],
   "attn_implementation": "eager",
   "auto_map": {
     "AutoConfig": "configuration_internlm2.InternLM2Config",
+    "AutoModel": "internlm/internlm2-7b-reward--modeling_internlm2.InternLM2ForRewardModel"
   },
   "bias": false,
   "bos_token_id": 1,
   "num_key_value_heads": 8,
   "pad_token_id": 2,
   "pretraining_tp": 1,
+  "reward_token_id": 92527,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "tie_word_embeddings": false,
+  "torch_dtype": "float16",
   "transformers_version": "4.47.1",
   "use_cache": true,
   "vocab_size": 92544

configuration_internlm2.py CHANGED Viewed

@@ -90,6 +90,8 @@ class InternLM2Config(PretrainedConfig):
             these scaling strategies behave:
             https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
             experimental feature, subject to breaking API changes in future versions.
     """
     _auto_class = "AutoConfig"
     model_type = "internlm2"
@@ -117,6 +119,7 @@ class InternLM2Config(PretrainedConfig):
         rope_theta=10000,
         rope_scaling=None,
         attn_implementation=None,
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -142,6 +145,7 @@ class InternLM2Config(PretrainedConfig):
         self.attn_implementation = attn_implementation
         if self.attn_implementation is None:
             self.attn_implementation = "eager"
         super().__init__(
             pad_token_id=pad_token_id,
@@ -177,4 +181,4 @@ class InternLM2Config(PretrainedConfig):
             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
-            )

             these scaling strategies behave:
             https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
             experimental feature, subject to breaking API changes in future versions.
+        reward_token_id (`int`, *optional*, defaults to 92527):
+            Token id used to calculate the reward score.
     """
     _auto_class = "AutoConfig"
     model_type = "internlm2"
         rope_theta=10000,
         rope_scaling=None,
         attn_implementation=None,
+        reward_token_id=92527,
         **kwargs,
     ):
         self.vocab_size = vocab_size
         self.attn_implementation = attn_implementation
         if self.attn_implementation is None:
             self.attn_implementation = "eager"
+        self.reward_token_id = reward_token_id
         super().__init__(
             pad_token_id=pad_token_id,
             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
+            )