Deci
/

Upload configuration_decicoder.py with huggingface_hub

#2
by itay-levy - opened
Files changed (1) hide show
  1. configuration_decicoder.py +45 -0
configuration_decicoder.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.llama.configuration_llama import LlamaConfig
2
+ from transformers.utils import logging
3
+
4
+
5
+ logger = logging.get_logger(__name__)
6
+
7
+ LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
8
+
9
+
10
+ class DeciCoderConfig(LlamaConfig):
11
+ r"""
12
+ This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
13
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
14
+ defaults will yield a similar configuration to that of the LLaMA-7B.
15
+
16
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
17
+ documentation from [`PretrainedConfig`] for more information.
18
+
19
+
20
+ Args:
21
+ naive_attention_prefill (`bool`, *optional*, defaults to False):
22
+ Whether to use naive matmul or scaled dot product attention during prefill.
23
+ naive_attention_decode_batched (`bool`, *optional*, defaults to True):
24
+ Whether to use naive matmul or scaled dot product attention during decode for batch_size > 1.
25
+ naive_attention_decode_single (`bool`, *optional*, defaults to False):
26
+ Whether to use naive matmul or scaled dot product attention during decode for batch_size == 1.
27
+
28
+
29
+ ```"""
30
+ model_type = "llama"
31
+ keys_to_ignore_at_inference = ["past_key_values"]
32
+
33
+ def __init__(
34
+ self,
35
+ naive_attention_prefill: bool = False,
36
+ naive_attention_decode_batched: bool = True,
37
+ naive_attention_decode_single: bool = False,
38
+ **kwargs,
39
+ ):
40
+ self.naive_attention_prefill = naive_attention_prefill
41
+ self.naive_attention_decode_batched = naive_attention_decode_batched
42
+ self.naive_attention_decode_single = naive_attention_decode_single
43
+
44
+ super().__init__(**kwargs,)
45
+