ccwu0918 commited on
Commit
580a47e
1 Parent(s): c9ec4c9

Delete configuration_qwen.py

Browse files
Files changed (1) hide show
  1. configuration_qwen.py +0 -65
configuration_qwen.py DELETED
@@ -1,65 +0,0 @@
1
- # Copyright (c) Alibaba Cloud.
2
- #
3
- # This source code is licensed under the license found in the
4
- # LICENSE file in the root directory of this source tree.
5
-
6
- from transformers import PretrainedConfig
7
-
8
-
9
- class QWenConfig(PretrainedConfig):
10
- model_type = "qwen"
11
- keys_to_ignore_at_inference = ["past_key_values"]
12
-
13
- def __init__(
14
- self,
15
- vocab_size=151936,
16
- hidden_size=4096,
17
- num_hidden_layers=32,
18
- num_attention_heads=32,
19
- emb_dropout_prob=0.0,
20
- attn_dropout_prob=0.0,
21
- layer_norm_epsilon=1e-6,
22
- initializer_range=0.02,
23
- max_position_embeddings=8192,
24
- scale_attn_weights=True,
25
- use_cache=True,
26
- bf16=False,
27
- fp16=False,
28
- fp32=False,
29
- kv_channels=128,
30
- rotary_pct=1.0,
31
- rotary_emb_base=10000,
32
- use_dynamic_ntk=True,
33
- use_logn_attn=True,
34
- use_flash_attn="auto",
35
- intermediate_size=22016,
36
- no_bias=True,
37
- tie_word_embeddings=False,
38
- **kwargs,
39
- ):
40
- self.vocab_size = vocab_size
41
- self.hidden_size = hidden_size
42
- self.intermediate_size = intermediate_size
43
- self.num_hidden_layers = num_hidden_layers
44
- self.num_attention_heads = num_attention_heads
45
- self.emb_dropout_prob = emb_dropout_prob
46
- self.attn_dropout_prob = attn_dropout_prob
47
- self.layer_norm_epsilon = layer_norm_epsilon
48
- self.initializer_range = initializer_range
49
- self.scale_attn_weights = scale_attn_weights
50
- self.use_cache = use_cache
51
- self.max_position_embeddings = max_position_embeddings
52
- self.bf16 = bf16
53
- self.fp16 = fp16
54
- self.fp32 = fp32
55
- self.kv_channels = kv_channels
56
- self.rotary_pct = rotary_pct
57
- self.rotary_emb_base = rotary_emb_base
58
- self.use_dynamic_ntk = use_dynamic_ntk
59
- self.use_logn_attn = use_logn_attn
60
- self.use_flash_attn = use_flash_attn
61
- self.no_bias = no_bias
62
- super().__init__(
63
- tie_word_embeddings=tie_word_embeddings,
64
- **kwargs
65
- )