BlackSamorez commited on
Commit
d24a252
1 Parent(s): 06753b2

Upload LlamaForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +82 -82
  2. generation_config.json +6 -0
  3. model.safetensors +3 -0
config.json CHANGED
@@ -1,84 +1,84 @@
1
  {
2
- "vocab_size": 32000,
3
- "max_position_embeddings": 2048,
4
- "hidden_size": 2048,
5
- "intermediate_size": 5632,
6
- "num_hidden_layers": 22,
7
- "num_attention_heads": 32,
8
- "num_key_value_heads": 4,
9
- "hidden_act": "silu",
10
- "initializer_range": 0.02,
11
- "rms_norm_eps": 1e-05,
12
- "pretraining_tp": 1,
13
- "use_cache": true,
14
- "rope_theta": 10000.0,
15
- "rope_scaling": null,
16
- "attention_bias": false,
17
- "attention_dropout": 0.0,
18
- "torch_dtype": "float16",
19
- "tie_word_embeddings": false,
20
- "architectures": [
21
- "LlamaForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ],
23
- "bos_token_id": 1,
24
- "eos_token_id": 2,
25
- "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
26
- "transformers_version": "4.38.0.dev0",
27
- "model_type": "llama",
28
- "quantization_config": {
29
- "quant_method": "aqlm",
30
- "nbits_per_codebook": 16,
31
- "num_codebooks": 1,
32
- "out_group_size": 1,
33
- "in_group_size": 8,
34
- "linear_weights_not_to_quantize": [
35
- "model.layers.0.input_layernorm.weight",
36
- "model.layers.0.post_attention_layernorm.weight",
37
- "model.layers.1.input_layernorm.weight",
38
- "model.layers.1.post_attention_layernorm.weight",
39
- "model.layers.2.input_layernorm.weight",
40
- "model.layers.2.post_attention_layernorm.weight",
41
- "model.layers.3.input_layernorm.weight",
42
- "model.layers.3.post_attention_layernorm.weight",
43
- "model.layers.4.input_layernorm.weight",
44
- "model.layers.4.post_attention_layernorm.weight",
45
- "model.layers.5.input_layernorm.weight",
46
- "model.layers.5.post_attention_layernorm.weight",
47
- "model.layers.6.input_layernorm.weight",
48
- "model.layers.6.post_attention_layernorm.weight",
49
- "model.layers.7.input_layernorm.weight",
50
- "model.layers.7.post_attention_layernorm.weight",
51
- "model.layers.8.input_layernorm.weight",
52
- "model.layers.8.post_attention_layernorm.weight",
53
- "model.layers.9.input_layernorm.weight",
54
- "model.layers.9.post_attention_layernorm.weight",
55
- "model.layers.10.input_layernorm.weight",
56
- "model.layers.10.post_attention_layernorm.weight",
57
- "model.layers.11.input_layernorm.weight",
58
- "model.layers.11.post_attention_layernorm.weight",
59
- "model.layers.12.input_layernorm.weight",
60
- "model.layers.12.post_attention_layernorm.weight",
61
- "model.layers.13.input_layernorm.weight",
62
- "model.layers.13.post_attention_layernorm.weight",
63
- "model.layers.14.input_layernorm.weight",
64
- "model.layers.14.post_attention_layernorm.weight",
65
- "model.layers.15.input_layernorm.weight",
66
- "model.layers.15.post_attention_layernorm.weight",
67
- "model.layers.16.input_layernorm.weight",
68
- "model.layers.16.post_attention_layernorm.weight",
69
- "model.layers.17.input_layernorm.weight",
70
- "model.layers.17.post_attention_layernorm.weight",
71
- "model.layers.18.input_layernorm.weight",
72
- "model.layers.18.post_attention_layernorm.weight",
73
- "model.layers.19.input_layernorm.weight",
74
- "model.layers.19.post_attention_layernorm.weight",
75
- "model.layers.20.input_layernorm.weight",
76
- "model.layers.20.post_attention_layernorm.weight",
77
- "model.layers.21.input_layernorm.weight",
78
- "model.layers.21.post_attention_layernorm.weight",
79
- "model.embed_tokens.weight",
80
- "model.norm.weight",
81
- "lm_head.weight"
82
- ]
83
- }
84
- }
 
1
  {
2
+ "_name_or_path": "/nfs/scistore14/alistgrp/apanfero/models/TinyLlama-1.1B-Chat-v1_0-AQLM-2Bit-1x16-hf/",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "quantization_config": {
21
+ "in_group_size": 8,
22
+ "linear_weights_not_to_quantize": [
23
+ "model.layers.0.input_layernorm.weight",
24
+ "model.layers.0.post_attention_layernorm.weight",
25
+ "model.layers.1.input_layernorm.weight",
26
+ "model.layers.1.post_attention_layernorm.weight",
27
+ "model.layers.2.input_layernorm.weight",
28
+ "model.layers.2.post_attention_layernorm.weight",
29
+ "model.layers.3.input_layernorm.weight",
30
+ "model.layers.3.post_attention_layernorm.weight",
31
+ "model.layers.4.input_layernorm.weight",
32
+ "model.layers.4.post_attention_layernorm.weight",
33
+ "model.layers.5.input_layernorm.weight",
34
+ "model.layers.5.post_attention_layernorm.weight",
35
+ "model.layers.6.input_layernorm.weight",
36
+ "model.layers.6.post_attention_layernorm.weight",
37
+ "model.layers.7.input_layernorm.weight",
38
+ "model.layers.7.post_attention_layernorm.weight",
39
+ "model.layers.8.input_layernorm.weight",
40
+ "model.layers.8.post_attention_layernorm.weight",
41
+ "model.layers.9.input_layernorm.weight",
42
+ "model.layers.9.post_attention_layernorm.weight",
43
+ "model.layers.10.input_layernorm.weight",
44
+ "model.layers.10.post_attention_layernorm.weight",
45
+ "model.layers.11.input_layernorm.weight",
46
+ "model.layers.11.post_attention_layernorm.weight",
47
+ "model.layers.12.input_layernorm.weight",
48
+ "model.layers.12.post_attention_layernorm.weight",
49
+ "model.layers.13.input_layernorm.weight",
50
+ "model.layers.13.post_attention_layernorm.weight",
51
+ "model.layers.14.input_layernorm.weight",
52
+ "model.layers.14.post_attention_layernorm.weight",
53
+ "model.layers.15.input_layernorm.weight",
54
+ "model.layers.15.post_attention_layernorm.weight",
55
+ "model.layers.16.input_layernorm.weight",
56
+ "model.layers.16.post_attention_layernorm.weight",
57
+ "model.layers.17.input_layernorm.weight",
58
+ "model.layers.17.post_attention_layernorm.weight",
59
+ "model.layers.18.input_layernorm.weight",
60
+ "model.layers.18.post_attention_layernorm.weight",
61
+ "model.layers.19.input_layernorm.weight",
62
+ "model.layers.19.post_attention_layernorm.weight",
63
+ "model.layers.20.input_layernorm.weight",
64
+ "model.layers.20.post_attention_layernorm.weight",
65
+ "model.layers.21.input_layernorm.weight",
66
+ "model.layers.21.post_attention_layernorm.weight",
67
+ "model.embed_tokens.weight",
68
+ "model.norm.weight",
69
+ "lm_head.weight"
70
  ],
71
+ "nbits_per_codebook": 16,
72
+ "num_codebooks": 1,
73
+ "out_group_size": 1,
74
+ "quant_method": "aqlm"
75
+ },
76
+ "rms_norm_eps": 1e-05,
77
+ "rope_scaling": null,
78
+ "rope_theta": 10000.0,
79
+ "tie_word_embeddings": false,
80
+ "torch_dtype": "float16",
81
+ "transformers_version": "4.38.0.dev0",
82
+ "use_cache": true,
83
+ "vocab_size": 32000
84
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.38.0.dev0"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bc4d941e02caa14f725916f97f925703efbedf260189c2cbad022c18e237b6a
3
+ size 666876880