Dracones commited on
Commit
ffba72d
1 Parent(s): ac11b08

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ base_model: microsoft/WizardLM-2-8x22B
6
+ tags:
7
+ - exl2
8
+ ---
9
+
10
+ # WizardLM-2-8x22B - EXL2 3.75bpw
11
+
12
+ This is a 3.75bpw EXL2 quant of [microsoft/WizardLM-2-8x22B](https://huggingface.co/microsoft/WizardLM-2-8x22B)
13
+
14
+ Details about the model can be found at the above model page.
15
+
16
+ ## EXL2 Version
17
+
18
+ These quants were made with exllamav2 version 0.0.18. Quants made on this version of EXL2 may not work on older versions of the exllamav2 library.
19
+
20
+ If you have problems loading these models, please update Text Generation WebUI to the latest version.
21
+
22
+ ## Perplexity Scoring
23
+
24
+ Below are the perplexity scores for the EXL2 models. A lower score is better.
25
+
26
+ | Quant Level | Perplexity Score |
27
+ |-------------|------------------|
28
+ | 7.0 | 4.5859 |
29
+ | 6.0 | 4.6252 |
30
+ | 5.5 | 4.6493 |
31
+ | 5.0 | 4.6937 |
32
+ | 4.5 | 4.8029 |
33
+ | 4.0 | 4.9372 |
34
+ | 3.5 | 5.1336 |
35
+ | 3.25 | 5.3636 |
36
+ | 3.0 | 5.5468 |
37
+ | 2.75 | 5.8255 |
38
+ | 2.5 | 6.3362 |
39
+ | 2.25 | 7.7763 |
40
+
41
+
42
+ ### Perplexity Script
43
+
44
+ This was the script used for perplexity testing.
45
+
46
+ ```bash
47
+ #!/bin/bash
48
+
49
+ # Activate the conda environment
50
+ source ~/miniconda3/etc/profile.d/conda.sh
51
+ conda activate exllamav2
52
+
53
+ DATA_SET=/root/wikitext/wikitext-2-v1.parquet
54
+
55
+ # Set the model name and bit size
56
+ MODEL_NAME="WizardLM-2-8x22B"
57
+ BIT_PRECISIONS=(6.0 5.5 5.0 4.5 4.0 3.5 3.25 3.0 2.75 2.5 2.25)
58
+
59
+ # Print the markdown table header
60
+ echo "| Quant Level | Perplexity Score |"
61
+ echo "|-------------|------------------|"
62
+
63
+ for BIT_PRECISION in "${BIT_PRECISIONS[@]}"
64
+ do
65
+ LOCAL_FOLDER="/root/models/${MODEL_NAME}_exl2_${BIT_PRECISION}bpw"
66
+ REMOTE_FOLDER="Dracones/${MODEL_NAME}_exl2_${BIT_PRECISION}bpw"
67
+
68
+ if [ ! -d "$LOCAL_FOLDER" ]; then
69
+ huggingface-cli download --local-dir-use-symlinks=False --local-dir "${LOCAL_FOLDER}" "${REMOTE_FOLDER}" >> /root/download.log 2>&1
70
+ fi
71
+
72
+ output=$(python test_inference.py -m "$LOCAL_FOLDER" -gs 40,40,40,40 -ed "$DATA_SET")
73
+ score=$(echo "$output" | grep -oP 'Evaluation perplexity: \K[\d.]+')
74
+ echo "| $BIT_PRECISION | $score |"
75
+ # rm -rf "${LOCAL_FOLDER}"
76
+ done
77
+ ```
78
+
79
+
80
+ ## Quant Details
81
+
82
+ This is the script used for quantization.
83
+
84
+ ```bash
85
+ #!/bin/bash
86
+
87
+ # Activate the conda environment
88
+ source ~/miniconda3/etc/profile.d/conda.sh
89
+ conda activate exllamav2
90
+
91
+ # Set the model name and bit size
92
+ MODEL_NAME="WizardLM-2-8x22B"
93
+
94
+ # Define variables
95
+ MODEL_DIR="/mnt/storage/models/$MODEL_NAME"
96
+ OUTPUT_DIR="exl2_$MODEL_NAME"
97
+ MEASUREMENT_FILE="measurements/$MODEL_NAME.json"
98
+
99
+ # Create the measurement file if needed
100
+ if [ ! -f "$MEASUREMENT_FILE" ]; then
101
+ echo "Creating $MEASUREMENT_FILE"
102
+ # Create directories
103
+ if [ -d "$OUTPUT_DIR" ]; then
104
+ rm -r "$OUTPUT_DIR"
105
+ fi
106
+ mkdir "$OUTPUT_DIR"
107
+
108
+ python convert.py -i $MODEL_DIR -o $OUTPUT_DIR -nr -om $MEASUREMENT_FILE
109
+ fi
110
+
111
+ # Choose one of the below. Either create a single quant for testing or a batch of them.
112
+ # BIT_PRECISIONS=(2.25)
113
+ BIT_PRECISIONS=(5.0 4.5 4.0 3.5 3.0 2.75 2.5 2.25)
114
+
115
+ for BIT_PRECISION in "${BIT_PRECISIONS[@]}"
116
+ do
117
+ CONVERTED_FOLDER="models/${MODEL_NAME}_exl2_${BIT_PRECISION}bpw"
118
+
119
+ # If it doesn't already exist, make the quant
120
+ if [ ! -d "$CONVERTED_FOLDER" ]; then
121
+
122
+ echo "Creating $CONVERTED_FOLDER"
123
+
124
+ # Create directories
125
+ if [ -d "$OUTPUT_DIR" ]; then
126
+ rm -r "$OUTPUT_DIR"
127
+ fi
128
+ mkdir "$OUTPUT_DIR"
129
+ mkdir "$CONVERTED_FOLDER"
130
+
131
+ # Run conversion commands
132
+ python convert.py -i $MODEL_DIR -o $OUTPUT_DIR -nr -m $MEASUREMENT_FILE -b $BIT_PRECISION -cf $CONVERTED_FOLDER
133
+
134
+ fi
135
+ done
136
+ ```
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 6144,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16384,
13
+ "max_position_embeddings": 65536,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 48,
16
+ "num_experts_per_tok": 2,
17
+ "num_hidden_layers": 56,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 8,
20
+ "output_router_logits": false,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000,
23
+ "router_aux_loss_coef": 0.001,
24
+ "router_jitter_noise": 0.0,
25
+ "sliding_window": null,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.36.2",
29
+ "use_cache": false,
30
+ "vocab_size": 32000,
31
+ "quantization_config": {
32
+ "quant_method": "exl2",
33
+ "version": "0.0.18",
34
+ "bits": 3.75,
35
+ "head_bits": 6,
36
+ "calibration": {
37
+ "rows": 100,
38
+ "length": 2048,
39
+ "dataset": "(default)"
40
+ }
41
+ }
42
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
+ }
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
output-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe437c9a17226122285cf7d9c88f6d6502d2c2e19ccf5babf595deaf13e1efc2
3
+ size 8576875672
output-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50cd2e1c818ac8538e98ae3f2c6891982bbff023edd376b7aa59fcc7cb71e0b7
3
+ size 8556181288
output-00003-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25be19428f051798afabf6a1afd2ee57764cacc807e89d4bbff93746dd3adf58
3
+ size 8542081272
output-00004-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66e18c2dfc3cecda1bf21f9b9d6abe306eae775b1e5fd88c3b407f23e025343
3
+ size 8561197984
output-00005-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0511ad5eae0f8cf2bd4bffc8012d64505ce56baa581462126f1849035fff48da
3
+ size 8587221576
output-00006-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d5fe20e14d62fcfe28fed84488beab0c50a921e2972ac7bb70bf7950bf25120
3
+ size 8580773160
output-00007-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda609eaac0d8120ef4fcfe04cf65cfff88b627f69b5fdb4b71894aca0ec2681
3
+ size 8580060880
output-00008-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da89a9f06272fdfa37e712326645f4d89d4c8a89be7db29e079ee8f672477f9
3
+ size 6302303448
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<unk>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }