bglearning
commited on
Commit
·
1bd9d2a
1
Parent(s):
2aa5579
Set use_memory_efficient_attention to False and modify config.json of 2_Dense_8192
Browse files- 2_Dense/config.json +1 -1
- 2_Dense/model.safetensors +2 -2
- 2_Dense/pytorch_model.bin +2 -2
- 2_Dense_8192/config.json +1 -1
- 2_Dense_8192/config_original.json +6 -0
- config.json +1 -38
2_Dense/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"in_features": 1024,
|
3 |
-
"out_features":
|
4 |
"bias": true,
|
5 |
"activation_function": "torch.nn.modules.linear.Identity"
|
6 |
}
|
|
|
1 |
{
|
2 |
"in_features": 1024,
|
3 |
+
"out_features": 1024,
|
4 |
"bias": true,
|
5 |
"activation_function": "torch.nn.modules.linear.Identity"
|
6 |
}
|
2_Dense/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caed226f6fc2317d1dde81540cbc2cc40055ab27dbf8e4377facfb281ac16fdd
|
3 |
+
size 4198592
|
2_Dense/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13e6f799c3a50eef2673ebd4b363e96787520bb6fe077104c4bc5aec5e0209b2
|
3 |
+
size 4199932
|
2_Dense_8192/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"in_features": 1024,
|
3 |
-
"out_features":
|
4 |
"bias": true,
|
5 |
"activation_function": "torch.nn.modules.linear.Identity"
|
6 |
}
|
|
|
1 |
{
|
2 |
"in_features": 1024,
|
3 |
+
"out_features": 1024,
|
4 |
"bias": true,
|
5 |
"activation_function": "torch.nn.modules.linear.Identity"
|
6 |
}
|
2_Dense_8192/config_original.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"in_features": 1024,
|
3 |
+
"out_features": 8192,
|
4 |
+
"bias": true,
|
5 |
+
"activation_function": "torch.nn.modules.linear.Identity"
|
6 |
+
}
|
config.json
CHANGED
@@ -1,38 +1 @@
|
|
1 |
-
{
|
2 |
-
"architectures": [
|
3 |
-
"NewModel"
|
4 |
-
],
|
5 |
-
"attention_probs_dropout_prob": 0.0,
|
6 |
-
"auto_map": {
|
7 |
-
"AutoConfig": "configuration.NewConfig",
|
8 |
-
"AutoModel": "modeling.NewModel"
|
9 |
-
},
|
10 |
-
"classifier_dropout": null,
|
11 |
-
"hidden_act": "gelu",
|
12 |
-
"hidden_dropout_prob": 0.1,
|
13 |
-
"hidden_size": 1024,
|
14 |
-
"initializer_range": 0.02,
|
15 |
-
"intermediate_size": 4096,
|
16 |
-
"layer_norm_eps": 1e-12,
|
17 |
-
"layer_norm_type": "layer_norm",
|
18 |
-
"logn_attention_clip1": false,
|
19 |
-
"logn_attention_scale": false,
|
20 |
-
"max_position_embeddings": 8192,
|
21 |
-
"model_type": "new",
|
22 |
-
"num_attention_heads": 16,
|
23 |
-
"num_hidden_layers": 24,
|
24 |
-
"pack_qkv": true,
|
25 |
-
"pad_token_id": 0,
|
26 |
-
"position_embedding_type": "rope",
|
27 |
-
"rope_scaling": {
|
28 |
-
"factor": 2.0,
|
29 |
-
"type": "ntk"
|
30 |
-
},
|
31 |
-
"rope_theta": 160000,
|
32 |
-
"torch_dtype": "float32",
|
33 |
-
"transformers_version": "4.41.2",
|
34 |
-
"type_vocab_size": 2,
|
35 |
-
"unpad_inputs": true,
|
36 |
-
"use_memory_efficient_attention": true,
|
37 |
-
"vocab_size": 30528
|
38 |
-
}
|
|
|
1 |
+
{"architectures": ["NewModel"], "attention_probs_dropout_prob": 0.0, "auto_map": {"AutoConfig": "configuration.NewConfig", "AutoModel": "modeling.NewModel"}, "classifier_dropout": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 4096, "layer_norm_eps": 1e-12, "layer_norm_type": "layer_norm", "logn_attention_clip1": false, "logn_attention_scale": false, "max_position_embeddings": 8192, "model_type": "new", "num_attention_heads": 16, "num_hidden_layers": 24, "pack_qkv": true, "pad_token_id": 0, "position_embedding_type": "rope", "rope_scaling": {"factor": 2.0, "type": "ntk"}, "rope_theta": 160000, "torch_dtype": "float32", "transformers_version": "4.41.2", "type_vocab_size": 2, "unpad_inputs": false, "use_memory_efficient_attention": false, "vocab_size": 30528}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|