mofosyne commited on
Commit
aec30a9
1 Parent(s): eac5a32

update to use the new metadata override feature already merged in

Browse files
.args CHANGED
@@ -1,2 +1,2 @@
1
  -m
2
- Tinyllama-5M-v0.2-F16.gguf
 
1
  -m
2
+ Maykeye_Tinyllama-4.6M-v0.0-F16.gguf
Tinyllama-5M-v0.2-F16.gguf → Maykeye_Tinyllama-4.6M-v0.0-F16.gguf RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0422a7e84b0d8a6ebc77513ec4fe53979850b9f6235265eba76e3e954072f86
3
- size 10008256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994dbfb58bbb955f4b8582007785e76572421bb9c5d095c8005cc09f739d8a70
3
+ size 10008672
Tinyllama-5M-v0.2-F16.llamafile → Maykeye_Tinyllama-4.6M-v0.0-F16.llamafile RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0098e4ca8bbf84474fc65105d9149047d73ad964481182b954efae4ab5a9bfe9
3
- size 19281727
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97e3d6892bb4f08c058c1e37224bf254f7914c860320c646f69d06988545a0c
3
+ size 21509971
Maykeye_Tinyllama-4.6M-v0.0-F16.md ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Maykeye_Tinyllama-4.6M-v0.0-F16.gguf - GGUF Internal File Dump
2
+
3
+ - Endian: LITTLE endian
4
+
5
+ ## Key Value Metadata Store
6
+
7
+ There are 36 key-value pairs in this file
8
+
9
+ | POS | TYPE | Count | Key | Value |
10
+ |----:|:----------|------:|:---------------------------------------|:---------------------------------------------------------------------------------|
11
+ | 1 | UINT32 | 1 | GGUF.version | 3 |
12
+ | 2 | UINT64 | 1 | GGUF.tensor_count | 75 |
13
+ | 3 | UINT64 | 1 | GGUF.kv_count | 33 |
14
+ | 4 | STRING | 1 | general.architecture | 'llama' |
15
+ | 5 | STRING | 1 | general.type | 'model' |
16
+ | 6 | STRING | 1 | general.name | 'Maykeye_Tinyllama' |
17
+ | 7 | STRING | 1 | general.author | 'Maykeye' |
18
+ | 8 | STRING | 1 | general.version | 'v0.0' |
19
+ | 9 | STRING | 1 | general.description | 'This gguf is ported from a first version of Maykeye attempt ' |
20
+ | 10 | STRING | 1 | general.quantized_by | 'Mofosyne' |
21
+ | 11 | STRING | 1 | general.size_label | '4.6M' |
22
+ | 12 | STRING | 1 | general.license | 'apache-2.0' |
23
+ | 13 | STRING | 1 | general.url | 'https://huggingface.co/mofosyne/TinyLLama-v0-llamafile' |
24
+ | 14 | STRING | 1 | general.source.url | 'https://huggingface.co/Maykeye/TinyLLama-v0' |
25
+ | 15 | [STRING] | 5 | general.tags | [ 'tiny ', '\n\x00\x00\x00\x00', 'tiny', '\x04\x00\x00\x00\x00', 'llama', ... ] |
26
+ | 16 | [STRING] | 1 | general.languages | [ 'en' ] |
27
+ | 17 | [STRING] | 2 | general.datasets | [ 'https', ']\x00\x00\x00\x00', ... ] |
28
+ | 18 | UINT32 | 1 | llama.block_count | 8 |
29
+ | 19 | UINT32 | 1 | llama.context_length | 2048 |
30
+ | 20 | UINT32 | 1 | llama.embedding_length | 64 |
31
+ | 21 | UINT32 | 1 | llama.feed_forward_length | 256 |
32
+ | 22 | UINT32 | 1 | llama.attention.head_count | 16 |
33
+ | 23 | FLOAT32 | 1 | llama.attention.layer_norm_rms_epsilon | 1e-06 |
34
+ | 24 | UINT32 | 1 | general.file_type | 1 |
35
+ | 25 | UINT32 | 1 | llama.vocab_size | 32000 |
36
+ | 26 | UINT32 | 1 | llama.rope.dimension_count | 4 |
37
+ | 27 | STRING | 1 | tokenizer.ggml.model | 'llama' |
38
+ | 28 | STRING | 1 | tokenizer.ggml.pre | 'default' |
39
+ | 29 | [STRING] | 32000 | tokenizer.ggml.tokens | [ 'А', '\x02\x00\x00\x00\x00', 'š', '\x02\x00\x00\x00\x00', 'α', ... ] |
40
+ | 30 | [FLOAT32] | 32000 | tokenizer.ggml.scores | [ -31740.0, -31739.0, -31738.0, -31737.0, -31736.0, -31735.0, -31734.0, ... ] |
41
+ | 31 | [INT32] | 32000 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] |
42
+ | 32 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 1 |
43
+ | 33 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 2 |
44
+ | 34 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 0 |
45
+ | 35 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 0 |
46
+ | 36 | UINT32 | 1 | general.quantization_version | 2 |
47
+
48
+ ## Tensors Overview ~5M Elements
49
+
50
+ Total number of elements in all tensors: 4621376 Elements
51
+
52
+ - [Base Tensor Group - ~4M Elements](#base)
53
+ - [Block 0 Tensor Group - ~66K Elements](#blk_0)
54
+ - [Block 1 Tensor Group - ~66K Elements](#blk_1)
55
+ - [Block 2 Tensor Group - ~66K Elements](#blk_2)
56
+ - [Block 3 Tensor Group - ~66K Elements](#blk_3)
57
+ - [Block 4 Tensor Group - ~66K Elements](#blk_4)
58
+ - [Block 5 Tensor Group - ~66K Elements](#blk_5)
59
+ - [Block 6 Tensor Group - ~66K Elements](#blk_6)
60
+ - [Block 7 Tensor Group - ~66K Elements](#blk_7)
61
+
62
+ ### Tensor Data Offset
63
+
64
+ This table contains the offset and data segment relative to start of file
65
+
66
+ | T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) |
67
+ |-----:|:-------------------------|-----------------:|-----------------:|
68
+ | 0 | output.weight | 0xba760 | 0x3e8000 |
69
+ | 1 | token_embd.weight | 0x4a2760 | 0x3e8000 |
70
+ | 2 | blk.0.attn_norm.weight | 0x88a760 | 0x100 |
71
+ | 3 | blk.0.ffn_down.weight | 0x88a860 | 0x8000 |
72
+ | 4 | blk.0.ffn_gate.weight | 0x892860 | 0x8000 |
73
+ | 5 | blk.0.ffn_up.weight | 0x89a860 | 0x8000 |
74
+ | 6 | blk.0.ffn_norm.weight | 0x8a2860 | 0x100 |
75
+ | 7 | blk.0.attn_k.weight | 0x8a2960 | 0x2000 |
76
+ | 8 | blk.0.attn_output.weight | 0x8a4960 | 0x2000 |
77
+ | 9 | blk.0.attn_q.weight | 0x8a6960 | 0x2000 |
78
+ | 10 | blk.0.attn_v.weight | 0x8a8960 | 0x2000 |
79
+ | 11 | blk.1.attn_norm.weight | 0x8aa960 | 0x100 |
80
+ | 12 | blk.1.ffn_down.weight | 0x8aaa60 | 0x8000 |
81
+ | 13 | blk.1.ffn_gate.weight | 0x8b2a60 | 0x8000 |
82
+ | 14 | blk.1.ffn_up.weight | 0x8baa60 | 0x8000 |
83
+ | 15 | blk.1.ffn_norm.weight | 0x8c2a60 | 0x100 |
84
+ | 16 | blk.1.attn_k.weight | 0x8c2b60 | 0x2000 |
85
+ | 17 | blk.1.attn_output.weight | 0x8c4b60 | 0x2000 |
86
+ | 18 | blk.1.attn_q.weight | 0x8c6b60 | 0x2000 |
87
+ | 19 | blk.1.attn_v.weight | 0x8c8b60 | 0x2000 |
88
+ | 20 | blk.2.attn_norm.weight | 0x8cab60 | 0x100 |
89
+ | 21 | blk.2.ffn_down.weight | 0x8cac60 | 0x8000 |
90
+ | 22 | blk.2.ffn_gate.weight | 0x8d2c60 | 0x8000 |
91
+ | 23 | blk.2.ffn_up.weight | 0x8dac60 | 0x8000 |
92
+ | 24 | blk.2.ffn_norm.weight | 0x8e2c60 | 0x100 |
93
+ | 25 | blk.2.attn_k.weight | 0x8e2d60 | 0x2000 |
94
+ | 26 | blk.2.attn_output.weight | 0x8e4d60 | 0x2000 |
95
+ | 27 | blk.2.attn_q.weight | 0x8e6d60 | 0x2000 |
96
+ | 28 | blk.2.attn_v.weight | 0x8e8d60 | 0x2000 |
97
+ | 29 | blk.3.attn_norm.weight | 0x8ead60 | 0x100 |
98
+ | 30 | blk.3.ffn_down.weight | 0x8eae60 | 0x8000 |
99
+ | 31 | blk.3.ffn_gate.weight | 0x8f2e60 | 0x8000 |
100
+ | 32 | blk.3.ffn_up.weight | 0x8fae60 | 0x8000 |
101
+ | 33 | blk.3.ffn_norm.weight | 0x902e60 | 0x100 |
102
+ | 34 | blk.3.attn_k.weight | 0x902f60 | 0x2000 |
103
+ | 35 | blk.3.attn_output.weight | 0x904f60 | 0x2000 |
104
+ | 36 | blk.3.attn_q.weight | 0x906f60 | 0x2000 |
105
+ | 37 | blk.3.attn_v.weight | 0x908f60 | 0x2000 |
106
+ | 38 | blk.4.attn_norm.weight | 0x90af60 | 0x100 |
107
+ | 39 | blk.4.ffn_down.weight | 0x90b060 | 0x8000 |
108
+ | 40 | blk.4.ffn_gate.weight | 0x913060 | 0x8000 |
109
+ | 41 | blk.4.ffn_up.weight | 0x91b060 | 0x8000 |
110
+ | 42 | blk.4.ffn_norm.weight | 0x923060 | 0x100 |
111
+ | 43 | blk.4.attn_k.weight | 0x923160 | 0x2000 |
112
+ | 44 | blk.4.attn_output.weight | 0x925160 | 0x2000 |
113
+ | 45 | blk.4.attn_q.weight | 0x927160 | 0x2000 |
114
+ | 46 | blk.4.attn_v.weight | 0x929160 | 0x2000 |
115
+ | 47 | blk.5.attn_norm.weight | 0x92b160 | 0x100 |
116
+ | 48 | blk.5.ffn_down.weight | 0x92b260 | 0x8000 |
117
+ | 49 | blk.5.ffn_gate.weight | 0x933260 | 0x8000 |
118
+ | 50 | blk.5.ffn_up.weight | 0x93b260 | 0x8000 |
119
+ | 51 | blk.5.ffn_norm.weight | 0x943260 | 0x100 |
120
+ | 52 | blk.5.attn_k.weight | 0x943360 | 0x2000 |
121
+ | 53 | blk.5.attn_output.weight | 0x945360 | 0x2000 |
122
+ | 54 | blk.5.attn_q.weight | 0x947360 | 0x2000 |
123
+ | 55 | blk.5.attn_v.weight | 0x949360 | 0x2000 |
124
+ | 56 | blk.6.attn_norm.weight | 0x94b360 | 0x100 |
125
+ | 57 | blk.6.ffn_down.weight | 0x94b460 | 0x8000 |
126
+ | 58 | blk.6.ffn_gate.weight | 0x953460 | 0x8000 |
127
+ | 59 | blk.6.ffn_up.weight | 0x95b460 | 0x8000 |
128
+ | 60 | blk.6.ffn_norm.weight | 0x963460 | 0x100 |
129
+ | 61 | blk.6.attn_k.weight | 0x963560 | 0x2000 |
130
+ | 62 | blk.6.attn_output.weight | 0x965560 | 0x2000 |
131
+ | 63 | blk.6.attn_q.weight | 0x967560 | 0x2000 |
132
+ | 64 | blk.6.attn_v.weight | 0x969560 | 0x2000 |
133
+ | 65 | blk.7.attn_norm.weight | 0x96b560 | 0x100 |
134
+ | 66 | blk.7.ffn_down.weight | 0x96b660 | 0x8000 |
135
+ | 67 | blk.7.ffn_gate.weight | 0x973660 | 0x8000 |
136
+ | 68 | blk.7.ffn_up.weight | 0x97b660 | 0x8000 |
137
+ | 69 | blk.7.ffn_norm.weight | 0x983660 | 0x100 |
138
+ | 70 | blk.7.attn_k.weight | 0x983760 | 0x2000 |
139
+ | 71 | blk.7.attn_output.weight | 0x985760 | 0x2000 |
140
+ | 72 | blk.7.attn_q.weight | 0x987760 | 0x2000 |
141
+ | 73 | blk.7.attn_v.weight | 0x989760 | 0x2000 |
142
+ | 74 | output_norm.weight | 0x98b760 | 0x100 |
143
+
144
+ ### <a name="base">Base Tensor Group : ~4M Elements</a>
145
+
146
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
147
+ |-----:|:-------------------|:---------------------------------|:--------------|:-------------------|:-----|
148
+ | 0 | output.weight | Output (W) | (~2M) 2048000 | 64 x 32000 x 1 x 1 | F16 |
149
+ | 1 | token_embd.weight | Token Embedding (W) | (~2M) 2048000 | 64 x 32000 x 1 x 1 | F16 |
150
+ | 74 | output_norm.weight | Output Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
151
+
152
+ - Total elements in base: ( ~4M) 4096064
153
+ - Percentage of total elements: 88.63%
154
+
155
+
156
+ ### <a name="blk_0">Block 0 Tensor Group : ~66K Elements</a>
157
+
158
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
159
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
160
+ | 2 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
161
+ | 3 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
162
+ | 4 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
163
+ | 5 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
164
+ | 6 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
165
+ | 7 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
166
+ | 8 | blk.0.attn_output.weight | Block 0 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
167
+ | 9 | blk.0.attn_q.weight | Block 0 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
168
+ | 10 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
169
+
170
+ - Total elements in blk.0: (~66K) 65664
171
+ - Percentage of total elements: 1.42%
172
+
173
+
174
+ ### <a name="blk_1">Block 1 Tensor Group : ~66K Elements</a>
175
+
176
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
177
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
178
+ | 11 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
179
+ | 12 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
180
+ | 13 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
181
+ | 14 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
182
+ | 15 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
183
+ | 16 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
184
+ | 17 | blk.1.attn_output.weight | Block 1 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
185
+ | 18 | blk.1.attn_q.weight | Block 1 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
186
+ | 19 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
187
+
188
+ - Total elements in blk.1: (~66K) 65664
189
+ - Percentage of total elements: 1.42%
190
+
191
+
192
+ ### <a name="blk_2">Block 2 Tensor Group : ~66K Elements</a>
193
+
194
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
195
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
196
+ | 20 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
197
+ | 21 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
198
+ | 22 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
199
+ | 23 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
200
+ | 24 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
201
+ | 25 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
202
+ | 26 | blk.2.attn_output.weight | Block 2 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
203
+ | 27 | blk.2.attn_q.weight | Block 2 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
204
+ | 28 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
205
+
206
+ - Total elements in blk.2: (~66K) 65664
207
+ - Percentage of total elements: 1.42%
208
+
209
+
210
+ ### <a name="blk_3">Block 3 Tensor Group : ~66K Elements</a>
211
+
212
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
213
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
214
+ | 29 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
215
+ | 30 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
216
+ | 31 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
217
+ | 32 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
218
+ | 33 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
219
+ | 34 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
220
+ | 35 | blk.3.attn_output.weight | Block 3 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
221
+ | 36 | blk.3.attn_q.weight | Block 3 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
222
+ | 37 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
223
+
224
+ - Total elements in blk.3: (~66K) 65664
225
+ - Percentage of total elements: 1.42%
226
+
227
+
228
+ ### <a name="blk_4">Block 4 Tensor Group : ~66K Elements</a>
229
+
230
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
231
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
232
+ | 38 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
233
+ | 39 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
234
+ | 40 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
235
+ | 41 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
236
+ | 42 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
237
+ | 43 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
238
+ | 44 | blk.4.attn_output.weight | Block 4 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
239
+ | 45 | blk.4.attn_q.weight | Block 4 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
240
+ | 46 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
241
+
242
+ - Total elements in blk.4: (~66K) 65664
243
+ - Percentage of total elements: 1.42%
244
+
245
+
246
+ ### <a name="blk_5">Block 5 Tensor Group : ~66K Elements</a>
247
+
248
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
249
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
250
+ | 47 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
251
+ | 48 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
252
+ | 49 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
253
+ | 50 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
254
+ | 51 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
255
+ | 52 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
256
+ | 53 | blk.5.attn_output.weight | Block 5 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
257
+ | 54 | blk.5.attn_q.weight | Block 5 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
258
+ | 55 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
259
+
260
+ - Total elements in blk.5: (~66K) 65664
261
+ - Percentage of total elements: 1.42%
262
+
263
+
264
+ ### <a name="blk_6">Block 6 Tensor Group : ~66K Elements</a>
265
+
266
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
267
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
268
+ | 56 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
269
+ | 57 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
270
+ | 58 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
271
+ | 59 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
272
+ | 60 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
273
+ | 61 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
274
+ | 62 | blk.6.attn_output.weight | Block 6 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
275
+ | 63 | blk.6.attn_q.weight | Block 6 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
276
+ | 64 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
277
+
278
+ - Total elements in blk.6: (~66K) 65664
279
+ - Percentage of total elements: 1.42%
280
+
281
+
282
+ ### <a name="blk_7">Block 7 Tensor Group : ~66K Elements</a>
283
+
284
+ | T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |
285
+ |-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
286
+ | 65 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
287
+ | 66 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~16K) 16384 | 256 x 64 x 1 x 1 | F16 |
288
+ | 67 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
289
+ | 68 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~16K) 16384 | 64 x 256 x 1 x 1 | F16 |
290
+ | 69 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( 64) 64 | 64 x 1 x 1 x 1 | F32 |
291
+ | 70 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
292
+ | 71 | blk.7.attn_output.weight | Block 7 Attention Output (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
293
+ | 72 | blk.7.attn_q.weight | Block 7 Attention Query (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
294
+ | 73 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4K) 4096 | 64 x 64 x 1 x 1 | F16 |
295
+
296
+ - Total elements in blk.7: (~66K) 65664
297
+ - Percentage of total elements: 1.42%
298
+
299
+
300
+
llama.cpp CHANGED
@@ -1 +1 @@
1
- Subproject commit 00ff73a90101c76108131a5867a3c3c78a42ee8c
 
1
+ Subproject commit 0d2c7321e9678e91b760ebe57f0d063856bb018b
llamafile CHANGED
@@ -1 +1 @@
1
- Subproject commit 9cd8d70942a049ba3c3bddd12e87e1fb599fbd49
 
1
+ Subproject commit b3930aa6472f3337d89d0b4399ee6e7afd9b17ad
llamafile-creation-legacy.sh DELETED
@@ -1,52 +0,0 @@
1
- #!/bin/bash
2
-
3
- MODEL_DIR="maykeye_tinyllama"
4
- METADATA_FILE="maykeye_tinyllama-metadata.json"
5
-
6
- ###############################################################################
7
- # Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
8
- echo == Prep Enviroment ==
9
- git submodule update --init
10
-
11
- ###############################################################################
12
- echo == Build and prep the llamafile engine execuable ==
13
- pushd llamafile
14
- make -j8
15
- make
16
- # This is where each executables is located for reference purpose for now as of 2024-04-05
17
- # and was determined by running `sudo make install PREFIX=/usr/local`
18
- # ./o/llamafile/zipalign --> /usr/local/bin/zipalign
19
- # ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
20
- # ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
21
- # ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
22
- # ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
23
- # ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
24
- # ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
25
- popd
26
-
27
- ###############################################################################
28
- echo == What is our llamafile name going to be? ==
29
- OUTFILE=$(./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
30
- echo We will be aiming to generate $OUTFILE.llamafile
31
-
32
- ###############################################################################
33
- echo == Convert from safetensor to gguf ==
34
- ./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16
35
- mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
36
-
37
- ###############################################################################
38
- echo == Generating Llamafile ==
39
- cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
40
-
41
- # Create an .args file with settings defaults
42
- cat >.args <<EOF
43
- -m
44
- ${OUTFILE}.gguf
45
- EOF
46
-
47
- # zip align engine, gguf and default args
48
- ./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
49
-
50
- ###############################################################################
51
- echo == Test Output ==
52
- ./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llamafile-creation.sh CHANGED
@@ -26,12 +26,15 @@ popd
26
 
27
  ###############################################################################
28
  echo == What is our llamafile name going to be? ==
29
- OUTFILE=$(./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
 
 
 
30
  echo We will be aiming to generate $OUTFILE.llamafile
31
 
32
  ###############################################################################
33
  echo == Convert from safetensor to gguf ==
34
- ./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
35
  mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
36
 
37
  ###############################################################################
@@ -48,5 +51,9 @@ EOF
48
  ./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
49
 
50
  ###############################################################################
51
- echo == Test Output ==
52
  ./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
 
 
 
 
 
26
 
27
  ###############################################################################
28
  echo == What is our llamafile name going to be? ==
29
+ ./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null
30
+ OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null)
31
+ OUTFILE_FILE=$(basename ${OUTFILE_PATH})
32
+ OUTFILE="${OUTFILE_FILE%.gguf}"
33
  echo We will be aiming to generate $OUTFILE.llamafile
34
 
35
  ###############################################################################
36
  echo == Convert from safetensor to gguf ==
37
+ ./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
38
  mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
39
 
40
  ###############################################################################
 
51
  ./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
52
 
53
  ###############################################################################
54
+ echo == Test Output ./${OUTFILE}.llamafile ==
55
  ./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
56
+
57
+ ###############################################################################
58
+ echo == Useful GGUF Technical Dump ==
59
+ ./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.md
maykeye_tinyllama-metadata.json CHANGED
@@ -1,10 +1,16 @@
1
  {
2
  "general.name": "TinyLLama",
3
- "general.version": "v0.2",
4
- "general.author": "mofosyne",
 
5
  "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
6
  "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
7
  "general.license": "apache-2.0",
 
 
8
  "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
9
- "general.source.huggingface.repository": "Maykeye/TinyLLama-v0"
 
 
 
10
  }
 
1
  {
2
  "general.name": "TinyLLama",
3
+ "general.version": "v0.0",
4
+ "general.author": "Maykeye",
5
+ "general.quantized_by": "Mofosyne",
6
  "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
7
  "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
8
  "general.license": "apache-2.0",
9
+ "general.license_name": "Apache License Version 2.0, January 2004",
10
+ "general.license_link": "https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md",
11
  "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
12
+ "general.source.source_repo_url": "https://huggingface.co/Maykeye/TinyLLama-v0",
13
+ "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"],
14
+ "general.languages": ["en"],
15
+ "general.datasets": ["https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-train.txt", "https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-valid.txt"]
16
  }