jojo1899 commited on
Commit
0869f09
1 Parent(s): 0ace247

Improved quantization using Openvino 2024.5.0rc1

Browse files
README.md CHANGED
@@ -7,21 +7,20 @@ tags:
7
 
8
  This is an INT4 quantized version of the `meta-llama/Llama-2-13b-chat-hf` model. The Python packages used in creating this model are as follows:
9
  ```
10
- openvino==2024.4.0
11
  optimum==1.23.3
12
  optimum-intel==1.20.1
13
  nncf==2.13.0
14
  torch==2.5.1
15
- transformers==4.46.1
16
  ```
17
  This quantized model is created using the following command:
18
  ```
19
- optimum-cli export openvino -m "meta-llama/Llama-2-13b-chat-hf" --task text-generation-with-past --weight-format int4 --group-size 128 --trust-remote-code ./Llama-2-13b-chat-hf-ov-int4
20
  ```
21
  For more details, run the following command from your Python environment: `optimum-cli export openvino --help`
22
 
23
  INFO:nncf:Statistics of the bitwidth distribution:
24
- | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
25
- |--------------|---------------------------|--------------------------------------|
26
- | 8 | 3% (2 / 282) | 0% (0 / 280) |
27
- | 4 | 97% (280 / 282) | 100% (280 / 280) |
 
7
 
8
  This is an INT4 quantized version of the `meta-llama/Llama-2-13b-chat-hf` model. The Python packages used in creating this model are as follows:
9
  ```
10
+ openvino==2024.5.0rc1
11
  optimum==1.23.3
12
  optimum-intel==1.20.1
13
  nncf==2.13.0
14
  torch==2.5.1
15
+ transformers==4.46.2
16
  ```
17
  This quantized model is created using the following command:
18
  ```
19
+ optimum-cli export openvino --model "meta-llama/Llama-2-13b-chat-hf" --weight-format int4 --group-size 128 --sym --ratio 1 --all-layers ./Llama-2-13b-chat-hf-ov-int4
20
  ```
21
  For more details, run the following command from your Python environment: `optimum-cli export openvino --help`
22
 
23
  INFO:nncf:Statistics of the bitwidth distribution:
24
+ | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
25
+ |----------------|-----------------------------|----------------------------------------|
26
+ | 4 | 100% (282 / 282) | 100% (282 / 282) |
 
config.json CHANGED
@@ -25,7 +25,7 @@
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "float16",
28
- "transformers_version": "4.46.1",
29
  "use_cache": true,
30
  "vocab_size": 32000
31
  }
 
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "float16",
28
+ "transformers_version": "4.46.2",
29
  "use_cache": true,
30
  "vocab_size": 32000
31
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
- "transformers_version": "4.46.1"
10
  }
 
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
+ "transformers_version": "4.46.2"
10
  }
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064cdc248bbfdd396d26d976af4f74f2f9ea4dfe01a00df1128df88a0a9dd7c8
3
- size 6921224160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4cfe288e8970a7ae85cff20ba84bb4295446873a212d529919888a28cb8c394
3
+ size 6712750576
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723