Duplicate from LeroyDyer/Mixtral_AI_CyberTron_Ultra
Browse filesCo-authored-by: leroy Samuel Dyer <LeroyDyer@users.noreply.huggingface.co>
- .gitattributes +35 -0
- README.md +102 -0
- config.json +28 -0
- generation_config.json +7 -0
- model-00001-of-00008.safetensors +3 -0
- model-00002-of-00008.safetensors +3 -0
- model-00003-of-00008.safetensors +3 -0
- model-00004-of-00008.safetensors +3 -0
- model-00005-of-00008.safetensors +3 -0
- model-00006-of-00008.safetensors +3 -0
- model-00007-of-00008.safetensors +3 -0
- model-00008-of-00008.safetensors +3 -0
- model.safetensors.index.json +298 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +40 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
license: apache-2.0
|
5 |
+
tags:
|
6 |
+
- text-generation-inference
|
7 |
+
- transformers
|
8 |
+
- unsloth
|
9 |
+
- mistral
|
10 |
+
- trl
|
11 |
+
- code
|
12 |
+
- 'medical '
|
13 |
+
- farmer
|
14 |
+
- doctor
|
15 |
+
- Mega-Series
|
16 |
+
- Cyber-Series
|
17 |
+
- Role-Play
|
18 |
+
- Self-Rag
|
19 |
+
- ThinkingBot
|
20 |
+
base_model: LeroyDyer/Mixtral_AI_CyberTron_Ultra
|
21 |
+
metrics:
|
22 |
+
- accuracy
|
23 |
+
- bertscore
|
24 |
+
- bleu
|
25 |
+
- brier_score
|
26 |
+
- cer
|
27 |
+
- character
|
28 |
+
- charcut_mt
|
29 |
+
- chrf
|
30 |
+
- code_eval
|
31 |
+
library_name: transformers
|
32 |
+
datasets:
|
33 |
+
- gretelai/synthetic_text_to_sql
|
34 |
+
- HuggingFaceTB/cosmopedia
|
35 |
+
- teknium/OpenHermes-2.5
|
36 |
+
- Open-Orca/SlimOrca
|
37 |
+
- Open-Orca/OpenOrca
|
38 |
+
- cognitivecomputations/dolphin-coder
|
39 |
+
- databricks/databricks-dolly-15k
|
40 |
+
- yahma/alpaca-cleaned
|
41 |
+
- uonlp/CulturaX
|
42 |
+
- mwitiderrick/SwahiliPlatypus
|
43 |
+
- swahili
|
44 |
+
- Rogendo/English-Swahili-Sentence-Pairs
|
45 |
+
- ise-uiuc/Magicoder-Evol-Instruct-110K
|
46 |
+
- meta-math/MetaMathQA
|
47 |
+
---
|
48 |
+
|
49 |
+
# Uploaded model
|
50 |
+
|
51 |
+
- **Developed by:** LeroyDyer
|
52 |
+
- **License:** apache-2.0
|
53 |
+
- **Finetuned from model :** LeroyDyer/Mixtral_AI_CyberTron_Ultra
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
### Ok Its a Great MODEL !
|
62 |
+
|
63 |
+
|
64 |
+
Highly Math Trained As well as many TextBooks and Lessons Highly fit datasets as well as Coding Datasets highly tuned!
|
65 |
+
|
66 |
+
This model has absorbed all its previous generations as well as ALL high performers and Specialist models (mistral) It has absorb many foriegn languge models and still stays as an english model !
|
67 |
+
|
68 |
+
Very impressive responses Short and long as also it was trained on some binary datasets to return a direct answer! and others to perform step by step response as wel as other to perform interactive response with clients for vairous tasks, such as product design and system design discussion:
|
69 |
+
|
70 |
+
Finacial information and other finacial tasks have been highly tunes also : Infact when returning to previous aligned datasets they stayed in line and was sdtill able to achieve High tuning!
|
71 |
+
Hence a process of merging with a specific topic or role and then training for the role and topic on themed data, hence previous itterations heavily tuned for medical or law or role play as the conception was that intergating the model into a single enity may even corrput them , so the decision to seperate concerns was taken :
|
72 |
+
This enabled for ssstrategic merging and tuning !
|
73 |
+
|
74 |
+
Concepts : chain of thought and functin calling Self rag ! Thoughts , emotive responses have been enhance where possibel with the data given . even sexy books have been highly tuned into the model :
|
75 |
+
but also i think american genera books (sci fi, fantasy, romance novels are required) for great role play which some expect: )
|
76 |
+
I have recently seen a strategy in which prompts can be embedded into the adapter to Trigger Specific Roles :
|
77 |
+
I hae tried to remove such prompting as you are a helpful ai to a character theme instead such as you are a cyber hacker by day and business man by night ! ie to give the model various internal personas !
|
78 |
+
after some training i noticed it was also talking to itself !! (rehersing) but the tokens for thought were missing so it lookeed strange until i noticed the bug;
|
79 |
+
After removing the thought tokens they were displayed in the output as the tokenizer was masking them !
|
80 |
+
|
81 |
+
But Still a Great Model , Given a Task based data set it Coverges Super quickly hence my enjoyment of the model as training of it is super quick !
|
82 |
+
Now when ii load up datasets : they are generally only a few bad steps before it begins to drop below zero maintaining a steady 0.6 etc whilst loading the unnseen new dataset , hence not needing so many epochs to adjust the matrix to the new information !
|
83 |
+
|
84 |
+
Im not sure if Lora actually works when you save them but i do save some and use them to load models for training ! as they are jump starts for model which did not recive that fine tuning , they can be merged and alligned ! (probably thiey are Good! )
|
85 |
+
|
86 |
+
### MOTTO FOR MODEL!
|
87 |
+
|
88 |
+
****Models are the same as loras , take them with light weight like tablets of knowledge!
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
This mistral model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|
101 |
+
|
102 |
+
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "LeroyDyer/Mixtral_AI_CyberTron_Ultra",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 14336,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 32,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"pad_token_id": 2,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_theta": 10000.0,
|
21 |
+
"sliding_window": 4096,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "float16",
|
24 |
+
"transformers_version": "4.38.2",
|
25 |
+
"unsloth_version": "2024.4",
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 32000
|
28 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 2,
|
6 |
+
"transformers_version": "4.38.2"
|
7 |
+
}
|
model-00001-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef790fca56022272e05112f600d841a9e7ad1b728af1ba36b31e40a764b2c11e
|
3 |
+
size 1889587008
|
model-00002-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f77db805de344f3fdc073aa9bef0a57f3109163d066c01fd657fbe8ccbb1a1
|
3 |
+
size 1946243896
|
model-00003-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8d0046bb4bb095df50a99063d578cfce4c341ad56590eebf847b281526c0735
|
3 |
+
size 1979781392
|
model-00004-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3074209aef2dfb940058d4ee841a391b5dae35bdb8024a03cb1aedb4f328b92
|
3 |
+
size 1946243936
|
model-00005-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b8b1f85c54a2ec019f6f42965ae4bd467de4682d5c8d7be781f1a9a54a16e09
|
3 |
+
size 1979781416
|
model-00006-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d214d88ba4f34978a59fb4e573f3ab5039d641aa3ff137adb30b409441243c
|
3 |
+
size 1946243936
|
model-00007-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3324e89483eb19568958cd74d7959807d5c26356c4d4de330d3a493ad54caa2a
|
3 |
+
size 1979781416
|
model-00008-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d43151c1dc055bd737b17a1aea7a7c32530bb1f8557e1eda53ef7cc824d590a
|
3 |
+
size 815834664
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 14483464192
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00008-of-00008.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00008.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00008.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00008.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00008.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
|
242 |
+
"model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
243 |
+
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
244 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
245 |
+
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
246 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
247 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
248 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
249 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
250 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
251 |
+
"model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
252 |
+
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
253 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
254 |
+
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
255 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
256 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
257 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
258 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
259 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
260 |
+
"model.layers.6.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
261 |
+
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
262 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
263 |
+
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
264 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
265 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
266 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
267 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
268 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
269 |
+
"model.layers.7.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
270 |
+
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
271 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
272 |
+
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
273 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
274 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
275 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
276 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
277 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
278 |
+
"model.layers.8.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
279 |
+
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
280 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
281 |
+
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
282 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
283 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
284 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
285 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
286 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
287 |
+
"model.layers.9.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
288 |
+
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
289 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
290 |
+
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
291 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
292 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
293 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
294 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
295 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
296 |
+
"model.norm.weight": "model-00008-of-00008.safetensors"
|
297 |
+
}
|
298 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|im_end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc460a0129515b7579ec9f63218012601729de4fbd1b5de8d56dc47e8a204a29
|
3 |
+
size 493449
|
tokenizer_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"bos_token": "<s>",
|
31 |
+
"chat_template": "{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'gpt' %}{{'<|im_start|>assistant\n' + message['value'] + '<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
32 |
+
"clean_up_tokenization_spaces": false,
|
33 |
+
"eos_token": "<|im_end|>",
|
34 |
+
"model_max_length": 32768,
|
35 |
+
"pad_token": "<unk>",
|
36 |
+
"padding_side": "left",
|
37 |
+
"tokenizer_class": "LlamaTokenizer",
|
38 |
+
"unk_token": "<unk>",
|
39 |
+
"use_default_system_prompt": false
|
40 |
+
}
|