Upload folder using huggingface_hub
Browse filesxDAN L2 MoE: xDAN-APUS4.0-MoE-Initial
- config.json +35 -0
- model-00001-of-00024.safetensors +3 -0
- model-00002-of-00024.safetensors +3 -0
- model-00003-of-00024.safetensors +3 -0
- model-00004-of-00024.safetensors +3 -0
- model-00005-of-00024.safetensors +3 -0
- model-00006-of-00024.safetensors +3 -0
- model-00007-of-00024.safetensors +3 -0
- model-00008-of-00024.safetensors +3 -0
- model-00009-of-00024.safetensors +3 -0
- model-00010-of-00024.safetensors +3 -0
- model-00011-of-00024.safetensors +3 -0
- model-00012-of-00024.safetensors +3 -0
- model-00013-of-00024.safetensors +3 -0
- model-00014-of-00024.safetensors +3 -0
- model-00015-of-00024.safetensors +3 -0
- model-00016-of-00024.safetensors +3 -0
- model-00017-of-00024.safetensors +3 -0
- model-00018-of-00024.safetensors +3 -0
- model-00019-of-00024.safetensors +3 -0
- model-00020-of-00024.safetensors +3 -0
- model-00021-of-00024.safetensors +3 -0
- model-00022-of-00024.safetensors +3 -0
- model-00023-of-00024.safetensors +3 -0
- model-00024-of-00024.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +40 -0
config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "01-ai/Yi-34B-200K",
|
3 |
+
"architectures": [
|
4 |
+
"MixtralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 7168,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 20480,
|
14 |
+
"max_position_embeddings": 200000,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"num_attention_heads": 56,
|
17 |
+
"num_experts_per_tok": 2,
|
18 |
+
"num_hidden_layers": 60,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"num_local_experts": 4,
|
21 |
+
"output_router_logits": false,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"pretraining_tp": 1,
|
24 |
+
"rms_norm_eps": 1e-05,
|
25 |
+
"rope_scaling": null,
|
26 |
+
"rope_theta": 10000000.0,
|
27 |
+
"router_aux_loss_coef": 0.001,
|
28 |
+
"router_jitter_noise": 0.0,
|
29 |
+
"sliding_window": null,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "bfloat16",
|
32 |
+
"transformers_version": "4.40.0",
|
33 |
+
"use_cache": true,
|
34 |
+
"vocab_size": 64000
|
35 |
+
}
|
model-00001-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6abf7dd9009c216881300216b23fa55bb326c9ad9c4ab46aaad785280ddf277e
|
3 |
+
size 9843074832
|
model-00002-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f59732863b433e06c187fe804cc10eb2ef2df39bc5f1e2561510d26bb4dd318c
|
3 |
+
size 9997201344
|
model-00003-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d7b5c530e796d1e5d271859e9325ef082fc7832f5e5e640fddb95b864b34235
|
3 |
+
size 9967840992
|
model-00004-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d089024a5dfd24b7467f7fa9dc4b4ebcbe0a96ee7a4e4b74e566fa5ee71f1c09
|
3 |
+
size 9806374952
|
model-00005-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b7ed3e46369e2304c53adf03f0d28ab6d602118337f715d2edb9719af4e7edc
|
3 |
+
size 9865066016
|
model-00006-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4de9be693639cf8d9f915434eca71c75cf59267c830a5f2f116d8df862718db8
|
3 |
+
size 9806375000
|
model-00007-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e42c37330d4477da56cad1f124f15501aa1289c44b7ea2d381f4d666ed6f1815
|
3 |
+
size 9806375000
|
model-00008-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab76df8304084303f8f6965ceaa76d3fd990a42722bb6e556c05c18a34302ccc
|
3 |
+
size 9865066016
|
model-00009-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c8f0efb8101de7ae2d241a6fed84ebd729fd5f81a21e01147eac3f3d840fea5
|
3 |
+
size 9806375000
|
model-00010-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:193f302608165e4c1c1ac06e5e2ff37dc9a6f7cd454b3a3f3579a0cc2a0e3aa4
|
3 |
+
size 9997201392
|
model-00011-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af696437e9ba3256fe70d565806d9b0589315d1864c32fd1037d56b3b1158379
|
3 |
+
size 9967841040
|
model-00012-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9145ac496c7d21f1d7fab3a5e0dcff9589335c2a1b31a28954575fc20fa739ed
|
3 |
+
size 9806375000
|
model-00013-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc1d39fbef40ddfd520442bbeb3e2204e04b317b745a31148e84d8fc7c8e3053
|
3 |
+
size 9865066016
|
model-00014-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e09c193ed6ab3f44b9574f2de1a09b3508fdccd52ca83e8612755cd75d15e637
|
3 |
+
size 9806375000
|
model-00015-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d5985b160ff407af391a72e5a66dc155a1dde81912e4e0cca5d1f4f9c1d0c8d
|
3 |
+
size 9806375000
|
model-00016-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca1200537ac70815888c9a78de2f3e5e96f59202fb771a0f6c96d2f7705ca504
|
3 |
+
size 9865066016
|
model-00017-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06af2636895f097735f29ddd7ce38fb20cea0ce70a9e0738fb0a81e64863cc1b
|
3 |
+
size 9806375000
|
model-00018-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b92187af3388170921b2ed3effabbdb00a5d9a4dbc2af2e8474d470e57e9db8
|
3 |
+
size 9997201392
|
model-00019-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec7db9d4d925cc5e640ed189df329e945160b81d8672ef65f64546224e1bc052
|
3 |
+
size 9967841040
|
model-00020-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a369498e9b5af758f444f3f534a36b70418e51f38d1e9051396a9b6c174e45
|
3 |
+
size 9806375000
|
model-00021-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7da3815794205745e718719042c3435de725435a5673dd8c5cb5c069e4f5b7a6
|
3 |
+
size 9865066016
|
model-00022-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d8f61f1675845d493b94e4e9a98a0f3f1b6945e032638c90224056904924f6
|
3 |
+
size 9806375000
|
model-00023-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:625f941e62e0e2b4abaae2e6b3c6be126e0d5fe98e9339a2d84e1c7b89a7deea
|
3 |
+
size 9277877616
|
model-00024-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53648a941893688922fb0ddf3bb0fbf4e6dc6058ee091fb81082c44cc1f52da1
|
3 |
+
size 920951840
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|startoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|startoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
|
3 |
+
size 1033105
|
tokenizer_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<|startoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "<|endoftext|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"bos_token": "<|startoftext|>",
|
31 |
+
"clean_up_tokenization_spaces": false,
|
32 |
+
"eos_token": "<|endoftext|>",
|
33 |
+
"legacy": true,
|
34 |
+
"model_max_length": 200000,
|
35 |
+
"pad_token": "<|startoftext|>",
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "LlamaTokenizer",
|
38 |
+
"unk_token": "<unk>",
|
39 |
+
"use_default_system_prompt": false
|
40 |
+
}
|