Upload configuration_decicoder.py with huggingface_hub
Browse files- configuration_decicoder.py +51 -0
configuration_decicoder.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from packaging import version
|
2 |
+
import transformers
|
3 |
+
if version.parse(transformers.__version__) < version.parse("4.31.0"):
|
4 |
+
raise ImportError(
|
5 |
+
f"You are using transformers=={transformers.__version__}, but transformers>=4.31.0 is required to use DeciCoder. Please upgrade transformers."
|
6 |
+
)
|
7 |
+
from transformers.models.llama.configuration_llama import LlamaConfig
|
8 |
+
from transformers.utils import logging
|
9 |
+
|
10 |
+
|
11 |
+
logger = logging.get_logger(__name__)
|
12 |
+
|
13 |
+
LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
14 |
+
|
15 |
+
|
16 |
+
class DeciCoderConfig(LlamaConfig):
|
17 |
+
r"""
|
18 |
+
This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
|
19 |
+
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
20 |
+
defaults will yield a similar configuration to that of the LLaMA-7B.
|
21 |
+
|
22 |
+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
23 |
+
documentation from [`PretrainedConfig`] for more information.
|
24 |
+
|
25 |
+
|
26 |
+
Args:
|
27 |
+
naive_attention_prefill (`bool`, *optional*, defaults to False):
|
28 |
+
Whether to use naive matmul or scaled dot product attention during prefill.
|
29 |
+
naive_attention_decode_batched (`bool`, *optional*, defaults to True):
|
30 |
+
Whether to use naive matmul or scaled dot product attention during decode for batch_size > 1.
|
31 |
+
naive_attention_decode_single (`bool`, *optional*, defaults to False):
|
32 |
+
Whether to use naive matmul or scaled dot product attention during decode for batch_size == 1.
|
33 |
+
|
34 |
+
|
35 |
+
```"""
|
36 |
+
model_type = "llama"
|
37 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
38 |
+
|
39 |
+
def __init__(
|
40 |
+
self,
|
41 |
+
naive_attention_prefill: bool = False,
|
42 |
+
naive_attention_decode_batched: bool = True,
|
43 |
+
naive_attention_decode_single: bool = False,
|
44 |
+
**kwargs,
|
45 |
+
):
|
46 |
+
self.naive_attention_prefill = naive_attention_prefill
|
47 |
+
self.naive_attention_decode_batched = naive_attention_decode_batched
|
48 |
+
self.naive_attention_decode_single = naive_attention_decode_single
|
49 |
+
|
50 |
+
super().__init__(**kwargs,)
|
51 |
+
|