# Pull both the model folder and llama.cpp (for the conversion script) | |
git submodule update --init | |
# Convert from safetensor to gguf | |
# (Assuming llama.cpp is in the next folder) | |
./llama.cpp/convert.py maykeye_tinyllama --metadata maykeye_tinyllama-metadata.json | |
# Copy the generated gguf to this folder | |
cp maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf | |
# Get the llamafile engine | |
cp /usr/local/bin/llamafile TinyLLama-v0-5M-F16.llamafile | |
# Create an .args file with settings defaults | |
cat >.args <<EOF | |
-m | |
TinyLLama-v0-5M-F16.gguf | |
EOF | |
# Combine | |
zipalign -j0 \ | |
TinyLLama-v0-5M-F16.llamafile \ | |
TinyLLama-v0-5M-F16.gguf \ | |
.args | |
# Test | |
./TinyLLama-v0-5M-F16.llamafile |