Corianas commited on
Commit
cb6a088
1 Parent(s): 4f81c2e

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - Corianas/Microllama_Char_88k_step
10
+ base_model:
11
+ - Corianas/Microllama_Char_88k_step
12
+ - Corianas/Microllama_Char_88k_step
13
+ ---
14
+
15
+ # microchar_moe
16
+
17
+ microchar_moe is a Mixture of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
18
+ * [Corianas/Microllama_Char_88k_step](https://huggingface.co/Corianas/Microllama_Char_88k_step)
19
+ * [Corianas/Microllama_Char_88k_step](https://huggingface.co/Corianas/Microllama_Char_88k_step)
20
+
21
+ ## 🧩 Configuration
22
+
23
+ ```yaml
24
+ base_model: Corianas/Microllama_Char_88k_step
25
+ gate_mode: random # one of "hidden", "cheap_embed", or "random"
26
+ dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
27
+ ## (optional)
28
+ # experts_per_token: 2
29
+ experts:
30
+ - source_model: Corianas/Microllama_Char_88k_step
31
+ positive_prompts:
32
+ - ""
33
+ ## (optional)
34
+ # negative_prompts:
35
+ # - "This is a prompt expert_model_1 should not be used for"
36
+ - source_model: Corianas/Microllama_Char_88k_step
37
+ positive_prompts:
38
+ - ""
39
+ ```
40
+
41
+ ## 💻 Usage
42
+
43
+ ```python
44
+ !pip install -qU transformers bitsandbytes accelerate
45
+
46
+ from transformers import AutoTokenizer
47
+ import transformers
48
+ import torch
49
+
50
+ model = "Corianas/microchar_moe"
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model)
53
+ pipeline = transformers.pipeline(
54
+ "text-generation",
55
+ model=model,
56
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
57
+ )
58
+
59
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
60
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
62
+ print(outputs[0]["generated_text"])
63
+ ```
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Corianas/Microllama_Char_88k_step",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 2048,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "mixtral",
16
+ "num_attention_heads": 12,
17
+ "num_experts_per_tok": 2,
18
+ "num_hidden_layers": 12,
19
+ "num_key_value_heads": 12,
20
+ "num_local_experts": 2,
21
+ "output_router_logits": false,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "router_aux_loss_coef": 0.001,
28
+ "sliding_window": null,
29
+ "tie_word_embeddings": true,
30
+ "torch_dtype": "bfloat16",
31
+ "transformers_version": "4.39.1",
32
+ "use_cache": true,
33
+ "vocab_size": 341
34
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_model: Corianas/Microllama_Char_88k_step
3
+ gate_mode: random # one of "hidden", "cheap_embed", or "random"
4
+ dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
5
+ ## (optional)
6
+ # experts_per_token: 2
7
+ experts:
8
+ - source_model: Corianas/Microllama_Char_88k_step
9
+ positive_prompts:
10
+ - ""
11
+ ## (optional)
12
+ # negative_prompts:
13
+ # - "This is a prompt expert_model_1 should not be used for"
14
+ - source_model: Corianas/Microllama_Char_88k_step
15
+ positive_prompts:
16
+ - ""
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681ff26ed7201c5981ae29a40350b5a0b0b211fee1d17309cbee821834e14d1f
3
+ size 284257216
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<unk>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<s>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": {
35
+ "type": "Sequence",
36
+ "normalizers": [
37
+ {
38
+ "type": "Prepend",
39
+ "prepend": "▁"
40
+ },
41
+ {
42
+ "type": "Replace",
43
+ "pattern": {
44
+ "String": " "
45
+ },
46
+ "content": "▁"
47
+ }
48
+ ]
49
+ },
50
+ "pre_tokenizer": null,
51
+ "post_processor": {
52
+ "type": "TemplateProcessing",
53
+ "single": [
54
+ {
55
+ "SpecialToken": {
56
+ "id": "<s>",
57
+ "type_id": 0
58
+ }
59
+ },
60
+ {
61
+ "Sequence": {
62
+ "id": "A",
63
+ "type_id": 0
64
+ }
65
+ }
66
+ ],
67
+ "pair": [
68
+ {
69
+ "SpecialToken": {
70
+ "id": "<s>",
71
+ "type_id": 0
72
+ }
73
+ },
74
+ {
75
+ "Sequence": {
76
+ "id": "A",
77
+ "type_id": 0
78
+ }
79
+ },
80
+ {
81
+ "SpecialToken": {
82
+ "id": "<s>",
83
+ "type_id": 1
84
+ }
85
+ },
86
+ {
87
+ "Sequence": {
88
+ "id": "B",
89
+ "type_id": 1
90
+ }
91
+ }
92
+ ],
93
+ "special_tokens": {
94
+ "<s>": {
95
+ "id": "<s>",
96
+ "ids": [
97
+ 1
98
+ ],
99
+ "tokens": [
100
+ "<s>"
101
+ ]
102
+ }
103
+ }
104
+ },
105
+ "decoder": {
106
+ "type": "Sequence",
107
+ "decoders": [
108
+ {
109
+ "type": "Replace",
110
+ "pattern": {
111
+ "String": "▁"
112
+ },
113
+ "content": " "
114
+ },
115
+ {
116
+ "type": "ByteFallback"
117
+ },
118
+ {
119
+ "type": "Fuse"
120
+ },
121
+ {
122
+ "type": "Strip",
123
+ "content": " ",
124
+ "start": 1,
125
+ "stop": 0
126
+ }
127
+ ]
128
+ },
129
+ "model": {
130
+ "type": "BPE",
131
+ "dropout": null,
132
+ "unk_token": "<unk>",
133
+ "continuing_subword_prefix": null,
134
+ "end_of_word_suffix": null,
135
+ "fuse_unk": true,
136
+ "byte_fallback": true,
137
+ "vocab": {
138
+ "<unk>": 0,
139
+ "<s>": 1,
140
+ "</s>": 2,
141
+ "<0x00>": 3,
142
+ "<0x01>": 4,
143
+ "<0x02>": 5,
144
+ "<0x03>": 6,
145
+ "<0x04>": 7,
146
+ "<0x05>": 8,
147
+ "<0x06>": 9,
148
+ "<0x07>": 10,
149
+ "<0x08>": 11,
150
+ "<0x09>": 12,
151
+ "<0x0A>": 13,
152
+ "<0x0B>": 14,
153
+ "<0x0C>": 15,
154
+ "<0x0D>": 16,
155
+ "<0x0E>": 17,
156
+ "<0x0F>": 18,
157
+ "<0x10>": 19,
158
+ "<0x11>": 20,
159
+ "<0x12>": 21,
160
+ "<0x13>": 22,
161
+ "<0x14>": 23,
162
+ "<0x15>": 24,
163
+ "<0x16>": 25,
164
+ "<0x17>": 26,
165
+ "<0x18>": 27,
166
+ "<0x19>": 28,
167
+ "<0x1A>": 29,
168
+ "<0x1B>": 30,
169
+ "<0x1C>": 31,
170
+ "<0x1D>": 32,
171
+ "<0x1E>": 33,
172
+ "<0x1F>": 34,
173
+ "<0x20>": 35,
174
+ "<0x21>": 36,
175
+ "<0x22>": 37,
176
+ "<0x23>": 38,
177
+ "<0x24>": 39,
178
+ "<0x25>": 40,
179
+ "<0x26>": 41,
180
+ "<0x27>": 42,
181
+ "<0x28>": 43,
182
+ "<0x29>": 44,
183
+ "<0x2A>": 45,
184
+ "<0x2B>": 46,
185
+ "<0x2C>": 47,
186
+ "<0x2D>": 48,
187
+ "<0x2E>": 49,
188
+ "<0x2F>": 50,
189
+ "<0x30>": 51,
190
+ "<0x31>": 52,
191
+ "<0x32>": 53,
192
+ "<0x33>": 54,
193
+ "<0x34>": 55,
194
+ "<0x35>": 56,
195
+ "<0x36>": 57,
196
+ "<0x37>": 58,
197
+ "<0x38>": 59,
198
+ "<0x39>": 60,
199
+ "<0x3A>": 61,
200
+ "<0x3B>": 62,
201
+ "<0x3C>": 63,
202
+ "<0x3D>": 64,
203
+ "<0x3E>": 65,
204
+ "<0x3F>": 66,
205
+ "<0x40>": 67,
206
+ "<0x41>": 68,
207
+ "<0x42>": 69,
208
+ "<0x43>": 70,
209
+ "<0x44>": 71,
210
+ "<0x45>": 72,
211
+ "<0x46>": 73,
212
+ "<0x47>": 74,
213
+ "<0x48>": 75,
214
+ "<0x49>": 76,
215
+ "<0x4A>": 77,
216
+ "<0x4B>": 78,
217
+ "<0x4C>": 79,
218
+ "<0x4D>": 80,
219
+ "<0x4E>": 81,
220
+ "<0x4F>": 82,
221
+ "<0x50>": 83,
222
+ "<0x51>": 84,
223
+ "<0x52>": 85,
224
+ "<0x53>": 86,
225
+ "<0x54>": 87,
226
+ "<0x55>": 88,
227
+ "<0x56>": 89,
228
+ "<0x57>": 90,
229
+ "<0x58>": 91,
230
+ "<0x59>": 92,
231
+ "<0x5A>": 93,
232
+ "<0x5B>": 94,
233
+ "<0x5C>": 95,
234
+ "<0x5D>": 96,
235
+ "<0x5E>": 97,
236
+ "<0x5F>": 98,
237
+ "<0x60>": 99,
238
+ "<0x61>": 100,
239
+ "<0x62>": 101,
240
+ "<0x63>": 102,
241
+ "<0x64>": 103,
242
+ "<0x65>": 104,
243
+ "<0x66>": 105,
244
+ "<0x67>": 106,
245
+ "<0x68>": 107,
246
+ "<0x69>": 108,
247
+ "<0x6A>": 109,
248
+ "<0x6B>": 110,
249
+ "<0x6C>": 111,
250
+ "<0x6D>": 112,
251
+ "<0x6E>": 113,
252
+ "<0x6F>": 114,
253
+ "<0x70>": 115,
254
+ "<0x71>": 116,
255
+ "<0x72>": 117,
256
+ "<0x73>": 118,
257
+ "<0x74>": 119,
258
+ "<0x75>": 120,
259
+ "<0x76>": 121,
260
+ "<0x77>": 122,
261
+ "<0x78>": 123,
262
+ "<0x79>": 124,
263
+ "<0x7A>": 125,
264
+ "<0x7B>": 126,
265
+ "<0x7C>": 127,
266
+ "<0x7D>": 128,
267
+ "<0x7E>": 129,
268
+ "<0x7F>": 130,
269
+ "<0x80>": 131,
270
+ "<0x81>": 132,
271
+ "<0x82>": 133,
272
+ "<0x83>": 134,
273
+ "<0x84>": 135,
274
+ "<0x85>": 136,
275
+ "<0x86>": 137,
276
+ "<0x87>": 138,
277
+ "<0x88>": 139,
278
+ "<0x89>": 140,
279
+ "<0x8A>": 141,
280
+ "<0x8B>": 142,
281
+ "<0x8C>": 143,
282
+ "<0x8D>": 144,
283
+ "<0x8E>": 145,
284
+ "<0x8F>": 146,
285
+ "<0x90>": 147,
286
+ "<0x91>": 148,
287
+ "<0x92>": 149,
288
+ "<0x93>": 150,
289
+ "<0x94>": 151,
290
+ "<0x95>": 152,
291
+ "<0x96>": 153,
292
+ "<0x97>": 154,
293
+ "<0x98>": 155,
294
+ "<0x99>": 156,
295
+ "<0x9A>": 157,
296
+ "<0x9B>": 158,
297
+ "<0x9C>": 159,
298
+ "<0x9D>": 160,
299
+ "<0x9E>": 161,
300
+ "<0x9F>": 162,
301
+ "<0xA0>": 163,
302
+ "<0xA1>": 164,
303
+ "<0xA2>": 165,
304
+ "<0xA3>": 166,
305
+ "<0xA4>": 167,
306
+ "<0xA5>": 168,
307
+ "<0xA6>": 169,
308
+ "<0xA7>": 170,
309
+ "<0xA8>": 171,
310
+ "<0xA9>": 172,
311
+ "<0xAA>": 173,
312
+ "<0xAB>": 174,
313
+ "<0xAC>": 175,
314
+ "<0xAD>": 176,
315
+ "<0xAE>": 177,
316
+ "<0xAF>": 178,
317
+ "<0xB0>": 179,
318
+ "<0xB1>": 180,
319
+ "<0xB2>": 181,
320
+ "<0xB3>": 182,
321
+ "<0xB4>": 183,
322
+ "<0xB5>": 184,
323
+ "<0xB6>": 185,
324
+ "<0xB7>": 186,
325
+ "<0xB8>": 187,
326
+ "<0xB9>": 188,
327
+ "<0xBA>": 189,
328
+ "<0xBB>": 190,
329
+ "<0xBC>": 191,
330
+ "<0xBD>": 192,
331
+ "<0xBE>": 193,
332
+ "<0xBF>": 194,
333
+ "<0xC0>": 195,
334
+ "<0xC1>": 196,
335
+ "<0xC2>": 197,
336
+ "<0xC3>": 198,
337
+ "<0xC4>": 199,
338
+ "<0xC5>": 200,
339
+ "<0xC6>": 201,
340
+ "<0xC7>": 202,
341
+ "<0xC8>": 203,
342
+ "<0xC9>": 204,
343
+ "<0xCA>": 205,
344
+ "<0xCB>": 206,
345
+ "<0xCC>": 207,
346
+ "<0xCD>": 208,
347
+ "<0xCE>": 209,
348
+ "<0xCF>": 210,
349
+ "<0xD0>": 211,
350
+ "<0xD1>": 212,
351
+ "<0xD2>": 213,
352
+ "<0xD3>": 214,
353
+ "<0xD4>": 215,
354
+ "<0xD5>": 216,
355
+ "<0xD6>": 217,
356
+ "<0xD7>": 218,
357
+ "<0xD8>": 219,
358
+ "<0xD9>": 220,
359
+ "<0xDA>": 221,
360
+ "<0xDB>": 222,
361
+ "<0xDC>": 223,
362
+ "<0xDD>": 224,
363
+ "<0xDE>": 225,
364
+ "<0xDF>": 226,
365
+ "<0xE0>": 227,
366
+ "<0xE1>": 228,
367
+ "<0xE2>": 229,
368
+ "<0xE3>": 230,
369
+ "<0xE4>": 231,
370
+ "<0xE5>": 232,
371
+ "<0xE6>": 233,
372
+ "<0xE7>": 234,
373
+ "<0xE8>": 235,
374
+ "<0xE9>": 236,
375
+ "<0xEA>": 237,
376
+ "<0xEB>": 238,
377
+ "<0xEC>": 239,
378
+ "<0xED>": 240,
379
+ "<0xEE>": 241,
380
+ "<0xEF>": 242,
381
+ "<0xF0>": 243,
382
+ "<0xF1>": 244,
383
+ "<0xF2>": 245,
384
+ "<0xF3>": 246,
385
+ "<0xF4>": 247,
386
+ "<0xF5>": 248,
387
+ "<0xF6>": 249,
388
+ "<0xF7>": 250,
389
+ "<0xF8>": 251,
390
+ "<0xF9>": 252,
391
+ "<0xFA>": 253,
392
+ "<0xFB>": 254,
393
+ "<0xFC>": 255,
394
+ "<0xFD>": 256,
395
+ "<0xFE>": 257,
396
+ "<0xFF>": 258,
397
+ "▁": 259,
398
+ "e": 260,
399
+ "t": 261,
400
+ "a": 262,
401
+ "o": 263,
402
+ "h": 264,
403
+ "n": 265,
404
+ "s": 266,
405
+ "i": 267,
406
+ "r": 268,
407
+ "d": 269,
408
+ "l": 270,
409
+ "u": 271,
410
+ "w": 272,
411
+ "m": 273,
412
+ "↨": 274,
413
+ "g": 275,
414
+ "c": 276,
415
+ "f": 277,
416
+ "y": 278,
417
+ ".": 279,
418
+ "p": 280,
419
+ ",": 281,
420
+ "b": 282,
421
+ "\r": 283,
422
+ "k": 284,
423
+ "v": 285,
424
+ "\"": 286,
425
+ "'": 287,
426
+ "j": 288,
427
+ "x": 289,
428
+ "z": 290,
429
+ "q": 291,
430
+ "§": 292,
431
+ "?": 293,
432
+ "-": 294,
433
+ "!": 295,
434
+ ":": 296,
435
+ "1": 297,
436
+ "0": 298,
437
+ "2": 299,
438
+ "5": 300,
439
+ "3": 301,
440
+ ";": 302,
441
+ "4": 303,
442
+ "9": 304,
443
+ "8": 305,
444
+ "$": 306,
445
+ "6": 307,
446
+ "7": 308,
447
+ "+": 309,
448
+ "=": 310,
449
+ ")": 311,
450
+ "(": 312,
451
+ "&": 313,
452
+ "/": 314,
453
+ "%": 315,
454
+ "^": 316,
455
+ "*": 317,
456
+ "`": 318,
457
+ "_": 319,
458
+ "]": 320,
459
+ "<": 321,
460
+ "[": 322,
461
+ "\\": 323,
462
+ ">": 324,
463
+ "{": 325,
464
+ "}": 326,
465
+ "|": 327,
466
+ "¼": 328,
467
+ "½": 329,
468
+ "π": 330,
469
+ "←": 331,
470
+ "↑": 332,
471
+ "→": 333,
472
+ "↓": 334,
473
+ "▲": 335,
474
+ "►": 336,
475
+ "▼": 337,
476
+ "◄": 338,
477
+ "♪": 339,
478
+ "♫": 340
479
+ },
480
+ "merges": []
481
+ }
482
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9ecce7fdafd04b12010f1b0b21feb89a2c0495b6ff899fac1ba9a285f4abc5
3
+ size 5543
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<s>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }