TinyLLama-v0-5M-F16-llamafile / llamafile-creation.sh
mofosyne's picture
streamlined
707f408
raw
history blame
705 Bytes
#!/bin/sh
# Pull both the model folder and llama.cpp (for the conversion script)
git submodule update --init
# Convert from safetensor to gguf
# (Assuming llama.cpp is in the next folder)
./llama.cpp/convert.py maykeye_tinyllama --metadata maykeye_tinyllama-metadata.json
# Copy the generated gguf to this folder
cp maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
# Get the llamafile engine
cp /usr/local/bin/llamafile TinyLLama-v0-5M-F16.llamafile
# Create an .args file with settings defaults
cat >.args <<EOF
-m
TinyLLama-v0-5M-F16.gguf
EOF
# Combine
zipalign -j0 \
TinyLLama-v0-5M-F16.llamafile \
TinyLLama-v0-5M-F16.gguf \
.args
# Test
./TinyLLama-v0-5M-F16.llamafile