Upload folder using huggingface_hub
Browse filesxDAN APUS4.0: xDAN-L2-moe-Random-v4.3-0402
- config.json +34 -0
- mergekit_moe_config.yml +68 -0
- model-00001-of-00024.safetensors +3 -0
- model-00002-of-00024.safetensors +3 -0
- model-00003-of-00024.safetensors +3 -0
- model-00004-of-00024.safetensors +3 -0
- model-00005-of-00024.safetensors +3 -0
- model-00006-of-00024.safetensors +3 -0
- model-00007-of-00024.safetensors +3 -0
- model-00008-of-00024.safetensors +3 -0
- model-00009-of-00024.safetensors +3 -0
- model-00010-of-00024.safetensors +3 -0
- model-00011-of-00024.safetensors +3 -0
- model-00012-of-00024.safetensors +3 -0
- model-00013-of-00024.safetensors +3 -0
- model-00014-of-00024.safetensors +3 -0
- model-00015-of-00024.safetensors +3 -0
- model-00016-of-00024.safetensors +3 -0
- model-00017-of-00024.safetensors +3 -0
- model-00018-of-00024.safetensors +3 -0
- model-00019-of-00024.safetensors +3 -0
- model-00020-of-00024.safetensors +3 -0
- model-00021-of-00024.safetensors +3 -0
- model-00022-of-00024.safetensors +3 -0
- model-00023-of-00024.safetensors +3 -0
- model-00024-of-00024.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +27 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +65 -0
config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-APUS-xDAN4.0-e3",
|
3 |
+
"architectures": [
|
4 |
+
"MixtralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 6,
|
9 |
+
"eos_token_id": 7,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 7168,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 20480,
|
14 |
+
"max_position_embeddings": 32768,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"num_attention_heads": 56,
|
17 |
+
"num_experts_per_tok": 2,
|
18 |
+
"num_hidden_layers": 60,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"num_local_experts": 4,
|
21 |
+
"output_router_logits": false,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"pretraining_tp": 1,
|
24 |
+
"rms_norm_eps": 1e-05,
|
25 |
+
"rope_scaling": null,
|
26 |
+
"rope_theta": 10000000.0,
|
27 |
+
"router_aux_loss_coef": 0.001,
|
28 |
+
"sliding_window": null,
|
29 |
+
"tie_word_embeddings": false,
|
30 |
+
"torch_dtype": "bfloat16",
|
31 |
+
"transformers_version": "4.39.2",
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 64000
|
34 |
+
}
|
mergekit_moe_config.yml
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-APUS-xDAN4.0-e3
|
2 |
+
gate_mode: random # Use the "hidden" mode for gate operation, implying internal decision-making without exposing the process.
|
3 |
+
tokenizer_source: base # Use the base tokenizer for processing inputs.
|
4 |
+
dtype: bfloat16 # Use bfloat16 data type for model output, balancing performance and precision.
|
5 |
+
experts_per_token: 2 # Assign two experts per token for enhanced decision-making.
|
6 |
+
experts:
|
7 |
+
- source_model: xDAN2099/xDAN-L2-RL-Mix378-BagelMath-0310-e2-Chat-v7.2-DPO-QDora-0317-epoch05
|
8 |
+
positive_prompts:
|
9 |
+
- "Explain quantum mechanics concepts in simple terms"
|
10 |
+
- "Detailed walkthrough of solving linear algebra problems"
|
11 |
+
- "Interpretation of complex statistical data for research papers"
|
12 |
+
- "Advanced calculus applications in engineering"
|
13 |
+
- "Mathematical modeling for economic forecasts"
|
14 |
+
negative_prompts:
|
15 |
+
- "General knowledge trivia questions"
|
16 |
+
- "Creating a screenplay for a movie"
|
17 |
+
- "Advice on personal relationships"
|
18 |
+
- "Cooking recipes for beginners"
|
19 |
+
- "Trends in digital marketing strategies"
|
20 |
+
|
21 |
+
- source_model: NousResearch/Nous-Hermes-2-Yi-34B
|
22 |
+
positive_prompts:
|
23 |
+
- "Generate creative writing prompts for a novel"
|
24 |
+
- "Dialogue script for a video game scenario"
|
25 |
+
- "Constructing an engaging blog post on technology trends"
|
26 |
+
- "Python coding tips for beginners"
|
27 |
+
- "Developing characters for a fantasy story"
|
28 |
+
negative_prompts:
|
29 |
+
- "In-depth analysis of a medical research paper"
|
30 |
+
- "Theoretical physics problem sets"
|
31 |
+
- "Investment portfolio optimization"
|
32 |
+
- "Architectural design principles"
|
33 |
+
- "Advanced machine learning algorithm explanations"
|
34 |
+
|
35 |
+
- source_model: xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-e1
|
36 |
+
positive_prompts:
|
37 |
+
- "Strategies for effective online teaching"
|
38 |
+
- "Guide to writing a technical paper in computer science"
|
39 |
+
- "Explaining software development life cycle (SDLC)"
|
40 |
+
- "Tips for engaging online content creation"
|
41 |
+
- "Overview of the latest web development frameworks"
|
42 |
+
negative_prompts:
|
43 |
+
- "Performing a detailed company financial analysis"
|
44 |
+
- "Creating a workout plan for athletes"
|
45 |
+
- "Restoration tips for classic cars"
|
46 |
+
- "Psychological assessment techniques"
|
47 |
+
- "Surgical techniques in modern medicine"
|
48 |
+
|
49 |
+
- source_model: xDAN2099/xDAN-APUS4-Preference-DPO-0331-v2-e1
|
50 |
+
positive_prompts:
|
51 |
+
- "Mathematics"
|
52 |
+
- "Physics"
|
53 |
+
- "Chemistry"
|
54 |
+
- "Biology"
|
55 |
+
- "Medicine"
|
56 |
+
- "Engineering"
|
57 |
+
- "Computer Science"
|
58 |
+
negative_prompts:
|
59 |
+
- "History"
|
60 |
+
- "Philosophy"
|
61 |
+
- "Linguistics"
|
62 |
+
- "Literature"
|
63 |
+
- "Art and Art History"
|
64 |
+
- "Music Theory and Composition"
|
65 |
+
- "Performing Arts (Theater, Dance)"
|
66 |
+
|
67 |
+
|
68 |
+
#CUDA_VISIBLE_DEVICES='' mergekit-moe xDAN-L2-moe-Random-v4.2-0327.yaml xDAN-L2-moe-Random-v4.2-0327
|
model-00001-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42feec9537e5ba7f180ccead79c4d195fbccf8d1d5789213ebbfa3872bf04671
|
3 |
+
size 9879789120
|
model-00002-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69a5aedaebb481cca6926009ede7c4ada05a72f98e9a48c2b96bad641983abf4
|
3 |
+
size 9865065968
|
model-00003-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4e4faf30493b57a8b4ecc94a7cca7d6b7ea802fa73bb165251119142634c4e8
|
3 |
+
size 9806374952
|
model-00004-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61d2714e9054ac3f654d00fc21f00c283fbc1c906fe615e36cd0649208cc1e31
|
3 |
+
size 9997201336
|
model-00005-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:065ac921e4c5140e42fb2b66aba64f0c068d20c371f94c1ea6b6a5cfcc362adb
|
3 |
+
size 9967841040
|
model-00006-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23d6da37e684c01bca3e50c59e9682b66958a4ba9921086e4951ee1dade64b18
|
3 |
+
size 9806375000
|
model-00007-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e412eb6ba16f046381022b05980f7fda5ca4028d52e08b6d3c010c31ee758cd9
|
3 |
+
size 9865066016
|
model-00008-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dea951f08c00278ae9a85907d45a2cf7bb0c677c1e71fe5dd053d45adf2f97a
|
3 |
+
size 9806375000
|
model-00009-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50762b9205d49aa6c7f090b386aa1d24095890260500fd1cd3c868a42a059e3
|
3 |
+
size 9806375000
|
model-00010-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2194c80f26b6d57e5f911fb8088bdee57f8e46231a252d14662debea3ca620ba
|
3 |
+
size 9865066016
|
model-00011-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02a447ba0ef5a3d024382cdab886b75ec40d2b9d47601df5e74a65b0f0925dbe
|
3 |
+
size 9806375000
|
model-00012-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:390128feadc7bb33d2a079cfc1ce077e6cf09db1b2eec26a3275e2688717d178
|
3 |
+
size 9997201392
|
model-00013-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd40e8e51380a32c75842d5b71c2e6433fb6fb7e0e48b169eb9ee22a01e3edac
|
3 |
+
size 9967841040
|
model-00014-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:154126d4b03402225cf6fdb2aaaa78a550f0604f690eb9a2c3670fd073e64781
|
3 |
+
size 9806375000
|
model-00015-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a94c5bbb78a29c0cf869b7b7062a9d5308f64acfae581b131e612d064cbccef4
|
3 |
+
size 9865066016
|
model-00016-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:849bc25ac4db5043e710c14825af3958859e9870d13faf013c5410a658eb8539
|
3 |
+
size 9806375000
|
model-00017-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:443e35413e625b8acba43c512871ba9076ac4d3ec3b95988fc41c6c8342e4dd7
|
3 |
+
size 9806375000
|
model-00018-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fdd7944a06b5fdd5d44f5c06bd32bbe0a289a1aa45eb231f69c8ed520219e44
|
3 |
+
size 9865066016
|
model-00019-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96d755b268c4fbde1b52e3acacfac90aad79e3799c18795420950ac9fe1baa3a
|
3 |
+
size 9806375000
|
model-00020-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ddf0bd1f47615b68624b382ddc7163f15a703746ec293a133e46d2274e000cb
|
3 |
+
size 9997201392
|
model-00021-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68d3dff150421361e0700ead6f3e4c2ff9ef38409d5925abd1c114086ad37f6c
|
3 |
+
size 9967841040
|
model-00022-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbb7e99090ee9b29f44d6da557c02dd4238a51c6f216908b1e44f079632bcb1f
|
3 |
+
size 9806375000
|
model-00023-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6979da55a1439bbff437207a75c3dbf07e54aa66cb14e2730c93cc61eeee8dc0
|
3 |
+
size 9865066016
|
model-00024-of-00024.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cf90071779251dd237a6798e35227b2a7203be2a99ffbe7e16e1f4083008d73
|
3 |
+
size 297048896
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>"
|
4 |
+
],
|
5 |
+
"bos_token": {
|
6 |
+
"content": "<|im_start|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"eos_token": {
|
13 |
+
"content": "<|im_end|>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false
|
18 |
+
},
|
19 |
+
"pad_token": "<|im_start|>",
|
20 |
+
"unk_token": {
|
21 |
+
"content": "<unk>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false
|
26 |
+
}
|
27 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
|
3 |
+
size 1033105
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": true,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"0": {
|
7 |
+
"content": "<unk>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"1": {
|
15 |
+
"content": "<|startoftext|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"2": {
|
23 |
+
"content": "<|endoftext|>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": true
|
29 |
+
},
|
30 |
+
"6": {
|
31 |
+
"content": "<|im_start|>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
"7": {
|
39 |
+
"content": "<|im_end|>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false,
|
44 |
+
"special": true
|
45 |
+
}
|
46 |
+
},
|
47 |
+
"additional_special_tokens": [
|
48 |
+
"<|im_start|>"
|
49 |
+
],
|
50 |
+
"bos_token": "<|im_start|>",
|
51 |
+
"chat_template": "{% set system_message = 'You are a helpful assistant named APUS-xDAN-4.0 MoE.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
|
52 |
+
"clean_up_tokenization_spaces": false,
|
53 |
+
"eos_token": "<|im_end|>",
|
54 |
+
"legacy": true,
|
55 |
+
"model_max_length": 200000,
|
56 |
+
"pad_token": "<|im_start|>",
|
57 |
+
"padding_side": "left",
|
58 |
+
"sp_model_kwargs": {},
|
59 |
+
"spaces_between_special_tokens": false,
|
60 |
+
"split_special_tokens": false,
|
61 |
+
"tokenizer_class": "LlamaTokenizer",
|
62 |
+
"unk_token": "<unk>",
|
63 |
+
"use_default_system_prompt": false,
|
64 |
+
"use_fast": true
|
65 |
+
}
|