jojo1899 commited on
Commit
397e787
·
verified ·
1 Parent(s): 85c391d

Quantized using nncf 2.13.0

Browse files
README.md CHANGED
@@ -7,14 +7,12 @@ tags:
7
 
8
  This is an INT4 quantized version of the `meta-llama/Llama-2-13b-chat-hf` model. The Python packages used in creating this model are as follows:
9
  ```
10
- openvino==2024.3.0.dev20240528
11
- openvino-nightly==2024.3.0.dev20240528
12
- openvino-tokenizers==2024.3.0.0.dev20240528
13
- optimum==1.19.2
14
- optimum-intel==1.17.0.dev0+aefabf0
15
- nncf==2.11.0.dev0+90a7f0d5
16
- torch==2.3.0+cu121
17
- transformers==4.40.2
18
  ```
19
  This quantized model is created using the following command:
20
  ```
@@ -25,5 +23,5 @@ For more details, run the following command from your Python environment: `optim
25
  INFO:nncf:Statistics of the bitwidth distribution:
26
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
27
  |--------------|---------------------------|--------------------------------------|
28
- | 8 | 22% (83 / 282) | 20% (81 / 280) |
29
- | 4 | 78% (199 / 282) | 80% (199 / 280) |
 
7
 
8
  This is an INT4 quantized version of the `meta-llama/Llama-2-13b-chat-hf` model. The Python packages used in creating this model are as follows:
9
  ```
10
+ openvino==2024.4.0
11
+ optimum==1.23.3
12
+ optimum-intel==1.20.1
13
+ nncf==2.13.0
14
+ torch==2.5.1
15
+ transformers==4.46.1
 
 
16
  ```
17
  This quantized model is created using the following command:
18
  ```
 
23
  INFO:nncf:Statistics of the bitwidth distribution:
24
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
25
  |--------------|---------------------------|--------------------------------------|
26
+ | 8 | 3% (2 / 282) | 0% (0 / 280) |
27
+ | 4 | 97% (280 / 282) | 100% (280 / 280) |
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "_name_or_path": "meta-llama/Llama-2-13b-chat-hf",
3
  "architectures": [
4
  "LlamaForCausalLM"
@@ -7,11 +8,13 @@
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "silu",
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 13824,
14
  "max_position_embeddings": 4096,
 
15
  "model_type": "llama",
16
  "num_attention_heads": 40,
17
  "num_hidden_layers": 40,
@@ -21,7 +24,8 @@
21
  "rope_scaling": null,
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
- "transformers_version": "4.40.2",
 
25
  "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "_name_or_path": "meta-llama/Llama-2-13b-chat-hf",
4
  "architectures": [
5
  "LlamaForCausalLM"
 
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
+ "head_dim": 128,
12
  "hidden_act": "silu",
13
  "hidden_size": 5120,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 13824,
16
  "max_position_embeddings": 4096,
17
+ "mlp_bias": false,
18
  "model_type": "llama",
19
  "num_attention_heads": 40,
20
  "num_hidden_layers": 40,
 
24
  "rope_scaling": null,
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.46.1",
29
  "use_cache": true,
30
  "vocab_size": 32000
31
  }
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
- "transformers_version": "4.40.2"
10
  }
 
6
  "pad_token_id": 0,
7
  "temperature": 0.6,
8
  "top_p": 0.9,
9
+ "transformers_version": "4.46.1"
10
  }
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96be0292e81bca0757ad3e9bbfb936ad92cd10a3404e4022b42e029cf23c4883
3
- size 8157059168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064cdc248bbfdd396d26d976af4f74f2f9ea4dfe01a00df1128df88a0a9dd7c8
3
+ size 6921224160
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",