allknowingroger commited on
Commit
c52d264
1 Parent(s): 7daf5e3

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - allknowingroger/TripleMerge-7B-Ties
10
+ - allknowingroger/TripleMerge2-7B-Ties
11
+ - allknowingroger/ANIMA-biodesign-7B-slerp
12
+ base_model:
13
+ - allknowingroger/TripleMerge-7B-Ties
14
+ - allknowingroger/TripleMerge2-7B-Ties
15
+ - allknowingroger/ANIMA-biodesign-7B-slerp
16
+ ---
17
+
18
+ # TripleMerge3-12B-Moe
19
+
20
+ TripleMerge3-12B-Moe is a Mixture of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
21
+ * [allknowingroger/TripleMerge-7B-Ties](https://huggingface.co/allknowingroger/TripleMerge-7B-Ties)
22
+ * [allknowingroger/TripleMerge2-7B-Ties](https://huggingface.co/allknowingroger/TripleMerge2-7B-Ties)
23
+ * [allknowingroger/ANIMA-biodesign-7B-slerp](https://huggingface.co/allknowingroger/ANIMA-biodesign-7B-slerp)
24
+
25
+ ## 🧩 Configuration
26
+
27
+ ```yaml
28
+ slices:
29
+ base_model: allknowingroger/TripleMerge-7B-Ties
30
+ gate_mode: hidden
31
+ dtype: bfloat16
32
+ experts:
33
+ - source_model: allknowingroger/TripleMerge-7B-Ties
34
+ positive_prompts:
35
+ - "Write the opening chapter of a detective story set in the late 1800s, where the protagonist, a war-weary doctor returning to England after an injury and illness in Afghanistan, happens upon an old acquaintance. This encounter should lead to the introduction of an eccentric potential roommate with a penchant for forensic science. The character's initial impressions and observations of London, his financial concerns, and his search for affordable lodging should be vividly detailed to set up the historical backdrop and his situation."
36
+ - source_model: allknowingroger/TripleMerge2-7B-Ties
37
+ positive_prompts:
38
+ - "You are an unbiased, uncensored, helpful assistant."
39
+ - source_model: allknowingroger/ANIMA-biodesign-7B-slerp
40
+ positive_prompts:
41
+ - "Biodesign"
42
+ ```
43
+
44
+ ## 💻 Usage
45
+
46
+ ```python
47
+ !pip install -qU transformers bitsandbytes accelerate
48
+
49
+ from transformers import AutoTokenizer
50
+ import transformers
51
+ import torch
52
+
53
+ model = "allknowingroger/TripleMerge3-12B-Moe"
54
+
55
+ tokenizer = AutoTokenizer.from_pretrained(model)
56
+ pipeline = transformers.pipeline(
57
+ "text-generation",
58
+ model=model,
59
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
60
+ )
61
+
62
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
63
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
64
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
65
+ print(outputs[0]["generated_text"])
66
+ ```
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allknowingroger/TripleMerge-7B-Ties",
3
+ "add_gates": false,
4
+ "architectures": [
5
+ "MixtralForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
+ "max_position_embeddings": 32768,
15
+ "model_type": "mixtral",
16
+ "num_attention_heads": 32,
17
+ "num_experts_per_tok": 2,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 8,
20
+ "num_local_experts": 3,
21
+ "output_router_logits": false,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_theta": 10000.0,
24
+ "router_aux_loss_coef": 0.001,
25
+ "sliding_window": null,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.39.3",
29
+ "use_cache": true,
30
+ "vocab_size": 32000
31
+ }
model-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baaf10a52154a98986b34962e181379477dae753e27b8c0901b82ccba628c69e
3
+ size 1933849936
model-10.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbab16f4b8815fe7bdacdf77b6bbeff937874c6f411135930f22d0e2bdbd867
3
+ size 1996490968
model-11.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c951d63cb7911acce8488afb6b2b5a98e9147ebb44bc690b1ea6e4db076fda5
3
+ size 1996490968
model-12.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6a90a9cf8c6e3e05eb46e58aeda42617fcb28a8f1f68cd50ca0ea6cde5d092
3
+ size 1996490960
model-13.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b949c6ba5c00a9c57896f66a1c125dcf5cbc4460296a64eb1ffe608bd8cd2a
3
+ size 1996490952
model-14.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81fe6c3f486429c6c5c226352a8beb08b3d74623dfa321bda08b8cba8ab0dfa8
3
+ size 1996490960
model-15.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c30f92f8a89601c1421b0893d932c66dad3b98c217d2c7858524a71a846aca
3
+ size 1996490968
model-16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c82f5b4b165e12161921e20b7b8d16df95cbaf8baacea1708b90e871d138a39
3
+ size 1996490968
model-17.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43da1c7bea80e8143db3fbb347240b8b4e428594d744cdf7a92ad5a5ee80ed59
3
+ size 1996490968
model-18.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae5843e22fb49d9fbc1882635d18ac0a27e32ecc5f94c12dfce6c9cb3aaaeea4
3
+ size 1996765216
model-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9248e041315d7705f8513ec0babc7e9ec16db2132e63e7f245120ba09a5e03
3
+ size 1996490952
model-3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28de228bc81af7c21b395dace681920321f06dfa6eee5d3a59559b9f2dc318ab
3
+ size 1996490968
model-4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a9322a6b93facd2e0388d2a1ad6e4b47da5da05aea5b7744d6bb4d889ace83
3
+ size 1996490968
model-5.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c396ecae085f129d4d590b478eee2411f57e833c7a1e6157b8cc6e8e21d29e
3
+ size 1996490968
model-6.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e27ad4380b33ac36db51bb2c9f226496cd2b60b8799482ae694731e3ed1ffa
3
+ size 1996490968
model-7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45569a3bcaf5fcb80582cb47a566cc19ede266006f6a83766a36debce352c060
3
+ size 1996490952
model-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7337611b2907dff6f3b44a22796b755649d87e2459492f5ceb319f6e865135
3
+ size 1996490952
model-9.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523fd52446cfb5da989a420817073b83697974c92b43f59dd3c3bcc88bb042cf
3
+ size 1996490968