xDAN2099 commited on
Commit
43d1e0f
1 Parent(s): 68582d2

Upload folder using huggingface_hub

Browse files

xDAN APUS4.0: xDAN-L2-moe-Random-v4.3-0402

config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-APUS-xDAN4.0-e3",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 6,
9
+ "eos_token_id": 7,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 7168,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 20480,
14
+ "max_position_embeddings": 32768,
15
+ "model_type": "mixtral",
16
+ "num_attention_heads": 56,
17
+ "num_experts_per_tok": 2,
18
+ "num_hidden_layers": 60,
19
+ "num_key_value_heads": 8,
20
+ "num_local_experts": 4,
21
+ "output_router_logits": false,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000000.0,
27
+ "router_aux_loss_coef": 0.001,
28
+ "sliding_window": null,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "bfloat16",
31
+ "transformers_version": "4.39.2",
32
+ "use_cache": true,
33
+ "vocab_size": 64000
34
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-APUS-xDAN4.0-e3
2
+ gate_mode: random # Use the "hidden" mode for gate operation, implying internal decision-making without exposing the process.
3
+ tokenizer_source: base # Use the base tokenizer for processing inputs.
4
+ dtype: bfloat16 # Use bfloat16 data type for model output, balancing performance and precision.
5
+ experts_per_token: 2 # Assign two experts per token for enhanced decision-making.
6
+ experts:
7
+ - source_model: xDAN2099/xDAN-L2-RL-Mix378-BagelMath-0310-e2-Chat-v7.2-DPO-QDora-0317-epoch05
8
+ positive_prompts:
9
+ - "Explain quantum mechanics concepts in simple terms"
10
+ - "Detailed walkthrough of solving linear algebra problems"
11
+ - "Interpretation of complex statistical data for research papers"
12
+ - "Advanced calculus applications in engineering"
13
+ - "Mathematical modeling for economic forecasts"
14
+ negative_prompts:
15
+ - "General knowledge trivia questions"
16
+ - "Creating a screenplay for a movie"
17
+ - "Advice on personal relationships"
18
+ - "Cooking recipes for beginners"
19
+ - "Trends in digital marketing strategies"
20
+
21
+ - source_model: NousResearch/Nous-Hermes-2-Yi-34B
22
+ positive_prompts:
23
+ - "Generate creative writing prompts for a novel"
24
+ - "Dialogue script for a video game scenario"
25
+ - "Constructing an engaging blog post on technology trends"
26
+ - "Python coding tips for beginners"
27
+ - "Developing characters for a fantasy story"
28
+ negative_prompts:
29
+ - "In-depth analysis of a medical research paper"
30
+ - "Theoretical physics problem sets"
31
+ - "Investment portfolio optimization"
32
+ - "Architectural design principles"
33
+ - "Advanced machine learning algorithm explanations"
34
+
35
+ - source_model: xDAN2099/xDAN-L2-RL-v7.3-Agent-Dlora-0318-e1
36
+ positive_prompts:
37
+ - "Strategies for effective online teaching"
38
+ - "Guide to writing a technical paper in computer science"
39
+ - "Explaining software development life cycle (SDLC)"
40
+ - "Tips for engaging online content creation"
41
+ - "Overview of the latest web development frameworks"
42
+ negative_prompts:
43
+ - "Performing a detailed company financial analysis"
44
+ - "Creating a workout plan for athletes"
45
+ - "Restoration tips for classic cars"
46
+ - "Psychological assessment techniques"
47
+ - "Surgical techniques in modern medicine"
48
+
49
+ - source_model: xDAN2099/xDAN-APUS4-Preference-DPO-0331-v2-e1
50
+ positive_prompts:
51
+ - "Mathematics"
52
+ - "Physics"
53
+ - "Chemistry"
54
+ - "Biology"
55
+ - "Medicine"
56
+ - "Engineering"
57
+ - "Computer Science"
58
+ negative_prompts:
59
+ - "History"
60
+ - "Philosophy"
61
+ - "Linguistics"
62
+ - "Literature"
63
+ - "Art and Art History"
64
+ - "Music Theory and Composition"
65
+ - "Performing Arts (Theater, Dance)"
66
+
67
+
68
+ #CUDA_VISIBLE_DEVICES='' mergekit-moe xDAN-L2-moe-Random-v4.2-0327.yaml xDAN-L2-moe-Random-v4.2-0327
model-00001-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42feec9537e5ba7f180ccead79c4d195fbccf8d1d5789213ebbfa3872bf04671
3
+ size 9879789120
model-00002-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a5aedaebb481cca6926009ede7c4ada05a72f98e9a48c2b96bad641983abf4
3
+ size 9865065968
model-00003-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e4faf30493b57a8b4ecc94a7cca7d6b7ea802fa73bb165251119142634c4e8
3
+ size 9806374952
model-00004-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61d2714e9054ac3f654d00fc21f00c283fbc1c906fe615e36cd0649208cc1e31
3
+ size 9997201336
model-00005-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:065ac921e4c5140e42fb2b66aba64f0c068d20c371f94c1ea6b6a5cfcc362adb
3
+ size 9967841040
model-00006-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23d6da37e684c01bca3e50c59e9682b66958a4ba9921086e4951ee1dade64b18
3
+ size 9806375000
model-00007-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e412eb6ba16f046381022b05980f7fda5ca4028d52e08b6d3c010c31ee758cd9
3
+ size 9865066016
model-00008-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dea951f08c00278ae9a85907d45a2cf7bb0c677c1e71fe5dd053d45adf2f97a
3
+ size 9806375000
model-00009-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50762b9205d49aa6c7f090b386aa1d24095890260500fd1cd3c868a42a059e3
3
+ size 9806375000
model-00010-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2194c80f26b6d57e5f911fb8088bdee57f8e46231a252d14662debea3ca620ba
3
+ size 9865066016
model-00011-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a447ba0ef5a3d024382cdab886b75ec40d2b9d47601df5e74a65b0f0925dbe
3
+ size 9806375000
model-00012-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390128feadc7bb33d2a079cfc1ce077e6cf09db1b2eec26a3275e2688717d178
3
+ size 9997201392
model-00013-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd40e8e51380a32c75842d5b71c2e6433fb6fb7e0e48b169eb9ee22a01e3edac
3
+ size 9967841040
model-00014-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154126d4b03402225cf6fdb2aaaa78a550f0604f690eb9a2c3670fd073e64781
3
+ size 9806375000
model-00015-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94c5bbb78a29c0cf869b7b7062a9d5308f64acfae581b131e612d064cbccef4
3
+ size 9865066016
model-00016-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849bc25ac4db5043e710c14825af3958859e9870d13faf013c5410a658eb8539
3
+ size 9806375000
model-00017-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:443e35413e625b8acba43c512871ba9076ac4d3ec3b95988fc41c6c8342e4dd7
3
+ size 9806375000
model-00018-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fdd7944a06b5fdd5d44f5c06bd32bbe0a289a1aa45eb231f69c8ed520219e44
3
+ size 9865066016
model-00019-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d755b268c4fbde1b52e3acacfac90aad79e3799c18795420950ac9fe1baa3a
3
+ size 9806375000
model-00020-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ddf0bd1f47615b68624b382ddc7163f15a703746ec293a133e46d2274e000cb
3
+ size 9997201392
model-00021-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d3dff150421361e0700ead6f3e4c2ff9ef38409d5925abd1c114086ad37f6c
3
+ size 9967841040
model-00022-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbb7e99090ee9b29f44d6da557c02dd4238a51c6f216908b1e44f079632bcb1f
3
+ size 9806375000
model-00023-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6979da55a1439bbff437207a75c3dbf07e54aa66cb14e2730c93cc61eeee8dc0
3
+ size 9865066016
model-00024-of-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf90071779251dd237a6798e35227b2a7203be2a99ffbe7e16e1f4083008d73
3
+ size 297048896
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|im_start|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|im_end|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": "<|im_start|>",
20
+ "unk_token": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|startoftext|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "6": {
31
+ "content": "<|im_start|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "7": {
39
+ "content": "<|im_end|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "additional_special_tokens": [
48
+ "<|im_start|>"
49
+ ],
50
+ "bos_token": "<|im_start|>",
51
+ "chat_template": "{% set system_message = 'You are a helpful assistant named APUS-xDAN-4.0 MoE.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
52
+ "clean_up_tokenization_spaces": false,
53
+ "eos_token": "<|im_end|>",
54
+ "legacy": true,
55
+ "model_max_length": 200000,
56
+ "pad_token": "<|im_start|>",
57
+ "padding_side": "left",
58
+ "sp_model_kwargs": {},
59
+ "spaces_between_special_tokens": false,
60
+ "split_special_tokens": false,
61
+ "tokenizer_class": "LlamaTokenizer",
62
+ "unk_token": "<unk>",
63
+ "use_default_system_prompt": false,
64
+ "use_fast": true
65
+ }