update to use the new metadata override feature already merged in

Browse files

Files changed (9) hide show

.args +1 -1
Tinyllama-5M-v0.2-F16.gguf → Maykeye_Tinyllama-4.6M-v0.0-F16.gguf +2 -2
Tinyllama-5M-v0.2-F16.llamafile → Maykeye_Tinyllama-4.6M-v0.0-F16.llamafile +2 -2
Maykeye_Tinyllama-4.6M-v0.0-F16.md +300 -0
llama.cpp +1 -1
llamafile +1 -1
llamafile-creation-legacy.sh +0 -52
llamafile-creation.sh +10 -3
maykeye_tinyllama-metadata.json +9 -3

.args CHANGED Viewed

	@@ -1,2 +1,2 @@
1	-m
2	- ~~Tinyllama~~-5M-v0.2-F16.gguf


1	-m
2	+ Maykeye_Tinyllama-4.6M-v0.0-F16.gguf

Tinyllama-5M-v0.2-F16.gguf → Maykeye_Tinyllama-4.6M-v0.0-F16.gguf RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0422a7e84b0d8a6ebc77513ec4fe53979850b9f6235265eba76e3e954072f86
-size 10008256

 version https://git-lfs.github.com/spec/v1
+oid sha256:994dbfb58bbb955f4b8582007785e76572421bb9c5d095c8005cc09f739d8a70
+size 10008672

Tinyllama-5M-v0.2-F16.llamafile → Maykeye_Tinyllama-4.6M-v0.0-F16.llamafile RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0098e4ca8bbf84474fc65105d9149047d73ad964481182b954efae4ab5a9bfe9
-size 19281727

 version https://git-lfs.github.com/spec/v1
+oid sha256:b97e3d6892bb4f08c058c1e37224bf254f7914c860320c646f69d06988545a0c
+size 21509971

Maykeye_Tinyllama-4.6M-v0.0-F16.md ADDED Viewed

	@@ -0,0 +1,300 @@

+# Maykeye_Tinyllama-4.6M-v0.0-F16.gguf - GGUF Internal File Dump
+- Endian: LITTLE endian
+## Key Value Metadata Store
+There are 36 key-value pairs in this file
+| POS | TYPE      | Count | Key                                    | Value                                                                            |
+|----:|:----------|------:|:---------------------------------------|:---------------------------------------------------------------------------------|
+|   1 | UINT32    |     1 | GGUF.version                           | 3                                                                                |
+|   2 | UINT64    |     1 | GGUF.tensor_count                      | 75                                                                               |
+|   3 | UINT64    |     1 | GGUF.kv_count                          | 33                                                                               |
+|   4 | STRING    |     1 | general.architecture                   | 'llama'                                                                          |
+|   5 | STRING    |     1 | general.type                           | 'model'                                                                          |
+|   6 | STRING    |     1 | general.name                           | 'Maykeye_Tinyllama'                                                              |
+|   7 | STRING    |     1 | general.author                         | 'Maykeye'                                                                        |
+|   8 | STRING    |     1 | general.version                        | 'v0.0'                                                                           |
+|   9 | STRING    |     1 | general.description                    | 'This gguf is ported from a first version of Maykeye attempt '                   |
+|  10 | STRING    |     1 | general.quantized_by                   | 'Mofosyne'                                                                       |
+|  11 | STRING    |     1 | general.size_label                     | '4.6M'                                                                           |
+|  12 | STRING    |     1 | general.license                        | 'apache-2.0'                                                                     |
+|  13 | STRING    |     1 | general.url                            | 'https://huggingface.co/mofosyne/TinyLLama-v0-llamafile'                         |
+|  14 | STRING    |     1 | general.source.url                     | 'https://huggingface.co/Maykeye/TinyLLama-v0'                                    |
+|  15 | [STRING]  |     5 | general.tags                           | [ 'tiny ', '\n\x00\x00\x00\x00', 'tiny', '\x04\x00\x00\x00\x00', 'llama',  ... ] |
+|  16 | [STRING]  |     1 | general.languages                      | [ 'en' ]                                                                         |
+|  17 | [STRING]  |     2 | general.datasets                       | [ 'https', ']\x00\x00\x00\x00',  ... ]                                           |
+|  18 | UINT32    |     1 | llama.block_count                      | 8                                                                                |
+|  19 | UINT32    |     1 | llama.context_length                   | 2048                                                                             |
+|  20 | UINT32    |     1 | llama.embedding_length                 | 64                                                                               |
+|  21 | UINT32    |     1 | llama.feed_forward_length              | 256                                                                              |
+|  22 | UINT32    |     1 | llama.attention.head_count             | 16                                                                               |
+|  23 | FLOAT32   |     1 | llama.attention.layer_norm_rms_epsilon | 1e-06                                                                            |
+|  24 | UINT32    |     1 | general.file_type                      | 1                                                                                |
+|  25 | UINT32    |     1 | llama.vocab_size                       | 32000                                                                            |
+|  26 | UINT32    |     1 | llama.rope.dimension_count             | 4                                                                                |
+|  27 | STRING    |     1 | tokenizer.ggml.model                   | 'llama'                                                                          |
+|  28 | STRING    |     1 | tokenizer.ggml.pre                     | 'default'                                                                        |
+|  29 | [STRING]  | 32000 | tokenizer.ggml.tokens                  | [ 'А', '\x02\x00\x00\x00\x00', 'š', '\x02\x00\x00\x00\x00', 'α',  ... ]          |
+|  30 | [FLOAT32] | 32000 | tokenizer.ggml.scores                  | [ -31740.0, -31739.0, -31738.0, -31737.0, -31736.0, -31735.0, -31734.0,  ... ]   |
+|  31 | [INT32]   | 32000 | tokenizer.ggml.token_type              | [ 1, 1, 1, 1, 1, 1, 1,  ... ]                                                    |
+|  32 | UINT32    |     1 | tokenizer.ggml.bos_token_id            | 1                                                                                |
+|  33 | UINT32    |     1 | tokenizer.ggml.eos_token_id            | 2                                                                                |
+|  34 | UINT32    |     1 | tokenizer.ggml.unknown_token_id        | 0                                                                                |
+|  35 | UINT32    |     1 | tokenizer.ggml.padding_token_id        | 0                                                                                |
+|  36 | UINT32    |     1 | general.quantization_version           | 2                                                                                |
+## Tensors Overview ~5M Elements
+Total number of elements in all tensors: 4621376 Elements
+- [Base Tensor Group - ~4M Elements](#base)
+- [Block 0 Tensor Group - ~66K Elements](#blk_0)
+- [Block 1 Tensor Group - ~66K Elements](#blk_1)
+- [Block 2 Tensor Group - ~66K Elements](#blk_2)
+- [Block 3 Tensor Group - ~66K Elements](#blk_3)
+- [Block 4 Tensor Group - ~66K Elements](#blk_4)
+- [Block 5 Tensor Group - ~66K Elements](#blk_5)
+- [Block 6 Tensor Group - ~66K Elements](#blk_6)
+- [Block 7 Tensor Group - ~66K Elements](#blk_7)
+### Tensor Data Offset
+This table contains the offset and data segment relative to start of file
+| T_ID | Tensor Layer Name        |  Data Offset (B) |    Data Size (B) |
+|-----:|:-------------------------|-----------------:|-----------------:|
+|    0 | output.weight            |          0xba760 |         0x3e8000 |
+|    1 | token_embd.weight        |         0x4a2760 |         0x3e8000 |
+|    2 | blk.0.attn_norm.weight   |         0x88a760 |            0x100 |
+|    3 | blk.0.ffn_down.weight    |         0x88a860 |           0x8000 |
+|    4 | blk.0.ffn_gate.weight    |         0x892860 |           0x8000 |
+|    5 | blk.0.ffn_up.weight      |         0x89a860 |           0x8000 |
+|    6 | blk.0.ffn_norm.weight    |         0x8a2860 |            0x100 |
+|    7 | blk.0.attn_k.weight      |         0x8a2960 |           0x2000 |
+|    8 | blk.0.attn_output.weight |         0x8a4960 |           0x2000 |
+|    9 | blk.0.attn_q.weight      |         0x8a6960 |           0x2000 |
+|   10 | blk.0.attn_v.weight      |         0x8a8960 |           0x2000 |
+|   11 | blk.1.attn_norm.weight   |         0x8aa960 |            0x100 |
+|   12 | blk.1.ffn_down.weight    |         0x8aaa60 |           0x8000 |
+|   13 | blk.1.ffn_gate.weight    |         0x8b2a60 |           0x8000 |
+|   14 | blk.1.ffn_up.weight      |         0x8baa60 |           0x8000 |
+|   15 | blk.1.ffn_norm.weight    |         0x8c2a60 |            0x100 |
+|   16 | blk.1.attn_k.weight      |         0x8c2b60 |           0x2000 |
+|   17 | blk.1.attn_output.weight |         0x8c4b60 |           0x2000 |
+|   18 | blk.1.attn_q.weight      |         0x8c6b60 |           0x2000 |
+|   19 | blk.1.attn_v.weight      |         0x8c8b60 |           0x2000 |
+|   20 | blk.2.attn_norm.weight   |         0x8cab60 |            0x100 |
+|   21 | blk.2.ffn_down.weight    |         0x8cac60 |           0x8000 |
+|   22 | blk.2.ffn_gate.weight    |         0x8d2c60 |           0x8000 |
+|   23 | blk.2.ffn_up.weight      |         0x8dac60 |           0x8000 |
+|   24 | blk.2.ffn_norm.weight    |         0x8e2c60 |            0x100 |
+|   25 | blk.2.attn_k.weight      |         0x8e2d60 |           0x2000 |
+|   26 | blk.2.attn_output.weight |         0x8e4d60 |           0x2000 |
+|   27 | blk.2.attn_q.weight      |         0x8e6d60 |           0x2000 |
+|   28 | blk.2.attn_v.weight      |         0x8e8d60 |           0x2000 |
+|   29 | blk.3.attn_norm.weight   |         0x8ead60 |            0x100 |
+|   30 | blk.3.ffn_down.weight    |         0x8eae60 |           0x8000 |
+|   31 | blk.3.ffn_gate.weight    |         0x8f2e60 |           0x8000 |
+|   32 | blk.3.ffn_up.weight      |         0x8fae60 |           0x8000 |
+|   33 | blk.3.ffn_norm.weight    |         0x902e60 |            0x100 |
+|   34 | blk.3.attn_k.weight      |         0x902f60 |           0x2000 |
+|   35 | blk.3.attn_output.weight |         0x904f60 |           0x2000 |
+|   36 | blk.3.attn_q.weight      |         0x906f60 |           0x2000 |
+|   37 | blk.3.attn_v.weight      |         0x908f60 |           0x2000 |
+|   38 | blk.4.attn_norm.weight   |         0x90af60 |            0x100 |
+|   39 | blk.4.ffn_down.weight    |         0x90b060 |           0x8000 |
+|   40 | blk.4.ffn_gate.weight    |         0x913060 |           0x8000 |
+|   41 | blk.4.ffn_up.weight      |         0x91b060 |           0x8000 |
+|   42 | blk.4.ffn_norm.weight    |         0x923060 |            0x100 |
+|   43 | blk.4.attn_k.weight      |         0x923160 |           0x2000 |
+|   44 | blk.4.attn_output.weight |         0x925160 |           0x2000 |
+|   45 | blk.4.attn_q.weight      |         0x927160 |           0x2000 |
+|   46 | blk.4.attn_v.weight      |         0x929160 |           0x2000 |
+|   47 | blk.5.attn_norm.weight   |         0x92b160 |            0x100 |
+|   48 | blk.5.ffn_down.weight    |         0x92b260 |           0x8000 |
+|   49 | blk.5.ffn_gate.weight    |         0x933260 |           0x8000 |
+|   50 | blk.5.ffn_up.weight      |         0x93b260 |           0x8000 |
+|   51 | blk.5.ffn_norm.weight    |         0x943260 |            0x100 |
+|   52 | blk.5.attn_k.weight      |         0x943360 |           0x2000 |
+|   53 | blk.5.attn_output.weight |         0x945360 |           0x2000 |
+|   54 | blk.5.attn_q.weight      |         0x947360 |           0x2000 |
+|   55 | blk.5.attn_v.weight      |         0x949360 |           0x2000 |
+|   56 | blk.6.attn_norm.weight   |         0x94b360 |            0x100 |
+|   57 | blk.6.ffn_down.weight    |         0x94b460 |           0x8000 |
+|   58 | blk.6.ffn_gate.weight    |         0x953460 |           0x8000 |
+|   59 | blk.6.ffn_up.weight      |         0x95b460 |           0x8000 |
+|   60 | blk.6.ffn_norm.weight    |         0x963460 |            0x100 |
+|   61 | blk.6.attn_k.weight      |         0x963560 |           0x2000 |
+|   62 | blk.6.attn_output.weight |         0x965560 |           0x2000 |
+|   63 | blk.6.attn_q.weight      |         0x967560 |           0x2000 |
+|   64 | blk.6.attn_v.weight      |         0x969560 |           0x2000 |
+|   65 | blk.7.attn_norm.weight   |         0x96b560 |            0x100 |
+|   66 | blk.7.ffn_down.weight    |         0x96b660 |           0x8000 |
+|   67 | blk.7.ffn_gate.weight    |         0x973660 |           0x8000 |
+|   68 | blk.7.ffn_up.weight      |         0x97b660 |           0x8000 |
+|   69 | blk.7.ffn_norm.weight    |         0x983660 |            0x100 |
+|   70 | blk.7.attn_k.weight      |         0x983760 |           0x2000 |
+|   71 | blk.7.attn_output.weight |         0x985760 |           0x2000 |
+|   72 | blk.7.attn_q.weight      |         0x987760 |           0x2000 |
+|   73 | blk.7.attn_v.weight      |         0x989760 |           0x2000 |
+|   74 | output_norm.weight       |         0x98b760 |            0x100 |
+### <a name="base">Base Tensor Group : ~4M Elements</a>
+| T_ID | Tensor Layer Name  | Human Friendly Tensor Layer Name | Elements      | Shape              | Type |
+|-----:|:-------------------|:---------------------------------|:--------------|:-------------------|:-----|
+|    0 | output.weight      | Output (W)                       | (~2M) 2048000 | 64 x 32000 x 1 x 1 | F16  |
+|    1 | token_embd.weight  | Token Embedding (W)              | (~2M) 2048000 | 64 x 32000 x 1 x 1 | F16  |
+|   74 | output_norm.weight | Output Normalization (W)         | ( 64)      64 | 64 x     1 x 1 x 1 | F32  |
+- Total elements in base: ( ~4M) 4096064
+- Percentage of total elements: 88.63%
+### <a name="blk_0">Block 0 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|    2 | blk.0.attn_norm.weight   | Block 0 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|    3 | blk.0.ffn_down.weight    | Block 0 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|    4 | blk.0.ffn_gate.weight    | Block 0 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|    5 | blk.0.ffn_up.weight      | Block 0 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|    6 | blk.0.ffn_norm.weight    | Block 0 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|    7 | blk.0.attn_k.weight      | Block 0 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|    8 | blk.0.attn_output.weight | Block 0 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|    9 | blk.0.attn_q.weight      | Block 0 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   10 | blk.0.attn_v.weight      | Block 0 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.0: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_1">Block 1 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   11 | blk.1.attn_norm.weight   | Block 1 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   12 | blk.1.ffn_down.weight    | Block 1 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   13 | blk.1.ffn_gate.weight    | Block 1 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   14 | blk.1.ffn_up.weight      | Block 1 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   15 | blk.1.ffn_norm.weight    | Block 1 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   16 | blk.1.attn_k.weight      | Block 1 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   17 | blk.1.attn_output.weight | Block 1 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   18 | blk.1.attn_q.weight      | Block 1 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   19 | blk.1.attn_v.weight      | Block 1 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.1: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_2">Block 2 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   20 | blk.2.attn_norm.weight   | Block 2 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   21 | blk.2.ffn_down.weight    | Block 2 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   22 | blk.2.ffn_gate.weight    | Block 2 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   23 | blk.2.ffn_up.weight      | Block 2 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   24 | blk.2.ffn_norm.weight    | Block 2 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   25 | blk.2.attn_k.weight      | Block 2 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   26 | blk.2.attn_output.weight | Block 2 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   27 | blk.2.attn_q.weight      | Block 2 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   28 | blk.2.attn_v.weight      | Block 2 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.2: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_3">Block 3 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   29 | blk.3.attn_norm.weight   | Block 3 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   30 | blk.3.ffn_down.weight    | Block 3 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   31 | blk.3.ffn_gate.weight    | Block 3 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   32 | blk.3.ffn_up.weight      | Block 3 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   33 | blk.3.ffn_norm.weight    | Block 3 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   34 | blk.3.attn_k.weight      | Block 3 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   35 | blk.3.attn_output.weight | Block 3 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   36 | blk.3.attn_q.weight      | Block 3 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   37 | blk.3.attn_v.weight      | Block 3 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.3: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_4">Block 4 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   38 | blk.4.attn_norm.weight   | Block 4 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   39 | blk.4.ffn_down.weight    | Block 4 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   40 | blk.4.ffn_gate.weight    | Block 4 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   41 | blk.4.ffn_up.weight      | Block 4 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   42 | blk.4.ffn_norm.weight    | Block 4 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   43 | blk.4.attn_k.weight      | Block 4 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   44 | blk.4.attn_output.weight | Block 4 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   45 | blk.4.attn_q.weight      | Block 4 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   46 | blk.4.attn_v.weight      | Block 4 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.4: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_5">Block 5 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   47 | blk.5.attn_norm.weight   | Block 5 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   48 | blk.5.ffn_down.weight    | Block 5 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   49 | blk.5.ffn_gate.weight    | Block 5 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   50 | blk.5.ffn_up.weight      | Block 5 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   51 | blk.5.ffn_norm.weight    | Block 5 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   52 | blk.5.attn_k.weight      | Block 5 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   53 | blk.5.attn_output.weight | Block 5 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   54 | blk.5.attn_q.weight      | Block 5 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   55 | blk.5.attn_v.weight      | Block 5 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.5: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_6">Block 6 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   56 | blk.6.attn_norm.weight   | Block 6 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   57 | blk.6.ffn_down.weight    | Block 6 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   58 | blk.6.ffn_gate.weight    | Block 6 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   59 | blk.6.ffn_up.weight      | Block 6 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   60 | blk.6.ffn_norm.weight    | Block 6 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   61 | blk.6.attn_k.weight      | Block 6 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   62 | blk.6.attn_output.weight | Block 6 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   63 | blk.6.attn_q.weight      | Block 6 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   64 | blk.6.attn_v.weight      | Block 6 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.6: (~66K) 65664
+- Percentage of total elements: 1.42%
+### <a name="blk_7">Block 7 Tensor Group : ~66K Elements</a>
+| T_ID | Tensor Layer Name        | Human Friendly Tensor Layer Name               | Elements     | Shape             | Type |
+|-----:|:-------------------------|:-----------------------------------------------|:-------------|:------------------|:-----|
+|   65 | blk.7.attn_norm.weight   | Block 7 Attention Normalization (W)            | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   66 | blk.7.ffn_down.weight    | Block 7 Feed-Forward Network "Down" (W)        | (~16K) 16384 | 256 x  64 x 1 x 1 | F16  |
+|   67 | blk.7.ffn_gate.weight    | Block 7 Feed-Forward Network "Gate" (W)        | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   68 | blk.7.ffn_up.weight      | Block 7 Feed-Forward Network "Up" (W)          | (~16K) 16384 |  64 x 256 x 1 x 1 | F16  |
+|   69 | blk.7.ffn_norm.weight    | Block 7 Feed-Forward Network Normalization (W) | (  64)    64 |  64 x   1 x 1 x 1 | F32  |
+|   70 | blk.7.attn_k.weight      | Block 7 Attention Key (W)                      | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   71 | blk.7.attn_output.weight | Block 7 Attention Output (W)                   | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   72 | blk.7.attn_q.weight      | Block 7 Attention Query (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+|   73 | blk.7.attn_v.weight      | Block 7 Attention Value (W)                    | ( ~4K)  4096 |  64 x  64 x 1 x 1 | F16  |
+- Total elements in blk.7: (~66K) 65664
+- Percentage of total elements: 1.42%

llama.cpp CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~00ff73a90101c76108131a5867a3c3c78a42ee8c~~


1	+ Subproject commit 0d2c7321e9678e91b760ebe57f0d063856bb018b

llamafile CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~9cd8d70942a049ba3c3bddd12e87e1fb599fbd49~~


1	+ Subproject commit b3930aa6472f3337d89d0b4399ee6e7afd9b17ad

llamafile-creation-legacy.sh DELETED Viewed

@@ -1,52 +0,0 @@
-#!/bin/bash
-MODEL_DIR="maykeye_tinyllama"
-METADATA_FILE="maykeye_tinyllama-metadata.json"
-###############################################################################
-# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
-echo == Prep Enviroment ==
-git submodule update --init
-###############################################################################
-echo == Build and prep the llamafile engine execuable ==
-pushd llamafile
-make -j8
-make
-# This is where each executables is located for reference purpose for now as of 2024-04-05
-# and was determined by running `sudo make install PREFIX=/usr/local`
-# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
-# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
-# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
-# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
-# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
-# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
-# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
-popd
-###############################################################################
-echo == What is our llamafile name going to be? ==
-OUTFILE=$(./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
-echo We will be aiming to generate $OUTFILE.llamafile
-###############################################################################
-echo == Convert from safetensor to gguf ==
-./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16
-mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
-###############################################################################
-echo == Generating Llamafile ==
-cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
-# Create an .args file with settings defaults
-cat >.args <<EOF
--m
-${OUTFILE}.gguf
-EOF
-# zip align engine, gguf and default args
-./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
-###############################################################################
-echo == Test Output ==
-./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"

llamafile-creation.sh CHANGED Viewed

@@ -26,12 +26,15 @@ popd
 ###############################################################################
 echo == What is our llamafile name going to be? ==
-OUTFILE=$(./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
 echo We will be aiming to generate $OUTFILE.llamafile
 ###############################################################################
 echo == Convert from safetensor to gguf ==
-./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
 mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
 ###############################################################################
@@ -48,5 +51,9 @@ EOF
 ./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
 ###############################################################################
-echo == Test Output ==
 ./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"

 ###############################################################################
 echo == What is our llamafile name going to be? ==
+./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null
+OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null)
+OUTFILE_FILE=$(basename ${OUTFILE_PATH})
+OUTFILE="${OUTFILE_FILE%.gguf}"
 echo We will be aiming to generate $OUTFILE.llamafile
 ###############################################################################
 echo == Convert from safetensor to gguf ==
+./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
 mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
 ###############################################################################
 ./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
 ###############################################################################
+echo == Test Output ./${OUTFILE}.llamafile ==
 ./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
+###############################################################################
+echo == Useful GGUF Technical Dump ==
+./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.md

maykeye_tinyllama-metadata.json CHANGED Viewed

@@ -1,10 +1,16 @@
 {
     "general.name": "TinyLLama",
-    "general.version": "v0.2",
-    "general.author": "mofosyne",
     "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
     "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
     "general.license": "apache-2.0",
     "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
-    "general.source.huggingface.repository": "Maykeye/TinyLLama-v0"
 }

 {
     "general.name": "TinyLLama",
+    "general.version": "v0.0",
+    "general.author": "Maykeye",
+    "general.quantized_by": "Mofosyne",
     "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
     "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
     "general.license": "apache-2.0",
+    "general.license_name": "Apache License Version 2.0, January 2004",
+    "general.license_link": "https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md",
     "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
+    "general.source.source_repo_url": "https://huggingface.co/Maykeye/TinyLLama-v0",
+    "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"],
+    "general.languages": ["en"],
+    "general.datasets": ["https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-train.txt", "https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-valid.txt"]
 }