zhaode commited on
Commit
403d883
1 Parent(s): 9dc548f

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. .gitattributes +13 -11
  2. README.md +50 -0
  3. config.json +8 -0
  4. embeddings_bf16.bin +3 -0
  5. llm.mnn +3 -0
  6. llm.mnn.json +3 -0
  7. llm.mnn.weight +3 -0
  8. llm_config.json +14 -0
  9. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -1,35 +1,37 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *.tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.db* filter=lfs diff=lfs merge=lfs -text
29
+ *.ark* filter=lfs diff=lfs merge=lfs -text
30
+ **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
31
+ **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
32
+ **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
35
+ *.mnn filter=lfs diff=lfs merge=lfs -text
36
+ *.mnn.* filter=lfs diff=lfs merge=lfs -text
37
+ *.weight filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - chat
8
+ ---
9
+ # glm-4-9b-chat-MNN
10
+
11
+ ## Introduction
12
+ This model is a 4-bit quantized version of the MNN model exported from [glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary) using [llmexport](https://github.com/alibaba/MNN/tree/master/transformers/llm/export).
13
+
14
+ ## Download
15
+ ```bash
16
+ # install huggingface
17
+ pip install huggingface
18
+ ```
19
+ ```bash
20
+ # shell download
21
+ huggingface download --model 'taobao-mnn/glm-4-9b-chat-MNN' --local_dir 'path/to/dir'
22
+ ```
23
+ ```python
24
+ # SDK download
25
+ from huggingface_hub import snapshot_download
26
+ model_dir = snapshot_download('taobao-mnn/glm-4-9b-chat-MNN')
27
+ ```
28
+
29
+ ```bash
30
+ # git clone
31
+ git clone https://www.modelscope.cn/taobao-mnn/glm-4-9b-chat-MNN
32
+ ```
33
+
34
+ ## Usage
35
+ ```bash
36
+ # clone MNN source
37
+ git clone https://github.com/alibaba/MNN.git
38
+
39
+ # compile
40
+ cd MNN
41
+ mkdir build && cd build
42
+ cmake .. -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true
43
+ make -j
44
+
45
+ # run
46
+ ./llm_demo /path/to/glm-4-9b-chat-MNN/config.json prompt.txt
47
+ ```
48
+
49
+ ## Document
50
+ [MNN-LLM](https://mnn-docs.readthedocs.io/en/latest/transformers/llm.html#)
config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llm_model": "llm.mnn",
3
+ "llm_weight": "llm.mnn.weight",
4
+ "backend_type": "cpu",
5
+ "thread_num": 4,
6
+ "precision": "low",
7
+ "memory": "low"
8
+ }
embeddings_bf16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ca0d5d6c339c24e196542ea5e4f37f7eb472fef8a5169ec474b737f123f977
3
+ size 1241513984
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de74aae307734f8d8deadb41550038536b18a6ba67ce5031c40f26d4ad425bff
3
+ size 3874704
llm.mnn.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a658600ae6e9bcbbab4e98ce0d7a6a7eea4250a94303775d6e1ea861db879e
3
+ size 23216816
llm.mnn.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b378b5cd918d6a635cf1327a5e286ff4b9fba34e6f98adc4ca8b51c81aa4c738
3
+ size 4938749114
llm_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_size": 4096,
3
+ "layer_nums": 40,
4
+ "attention_mask": "float",
5
+ "key_value_shape": [
6
+ 2,
7
+ 1,
8
+ 0,
9
+ 2,
10
+ 128
11
+ ],
12
+ "prompt_template": "<|user|>\n%s\n<|assistant|>\n",
13
+ "is_visual": false
14
+ }
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff