xingyaoww
/

Qwen2.5-Coder-32B-Instruct-AWQ-128k

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

xingyaoww commited on Nov 11, 2024

Commit

22f934f

·

1 Parent(s): 809193f

add rope scaling

Files changed (2) hide show

README.md +1 -0
config.json +7 -2

README.md CHANGED Viewed

@@ -15,6 +15,7 @@ tags:
 - qwen-coder
 ---
 # Qwen2.5-Coder-32B-Instruct-AWQ

 - qwen-coder
 ---
+**NOTE: This repo make no changes to the original model and only added the [extended context support](https://qwen.readthedocs.io/en/latest/deployment/vllm.html#extended-context-support) to `config.json`.**
 # Qwen2.5-Coder-32B-Instruct-AWQ

config.json CHANGED Viewed

@@ -1,4 +1,4 @@
-{
   "architectures": [
     "Qwen2ForCausalLM"
   ],
@@ -25,6 +25,11 @@
   },
   "rms_norm_eps": 1e-06,
   "rope_theta": 1000000.0,
   "sliding_window": 131072,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
@@ -32,4 +37,4 @@
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064
-}

   "architectures": [
     "Qwen2ForCausalLM"
   ],
   },
   "rms_norm_eps": 1e-06,
   "rope_theta": 1000000.0,
+  "rope_scaling": {
+    "factor": 4.0,
+    "original_max_position_embeddings": 32768,
+    "type": "yarn"
+  },
   "sliding_window": 131072,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064