Paul Dufour
update non-slim versions of models
e3b994d
SHELL := /bin/bash
# Configuration variables
NATIVE_ANDROID = $(abspath ../Native-LLM-for-Android)
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools
# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3
# Model parts
PARTS = A B C D E
define progress_bar
printf "\r Progress: \033[1;32m["; \
_done=$$(($1 * 20 / $2)); \
for ((i=0; i<_done; i++)); do printf "="; done; \
printf "\033[0m"; \
_left=$$((20 - _done)); \
for ((i=0; i<_left; i++)); do printf " "; done; \
printf "\033[1;32m]\033[0m $1/$2 Processing: \033[1;34m%s\033[K\033[0m\r" "$3"
endef
# See https://github.com/pytorch/pytorch/issues/94280#issuecomment-2089196400
# Original export scripts export a bunch of tensor files, so we merge into one / two files instead.
export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass
@echo "βœ… Exporting merged source models complete"
export-merged-source-models-first-pass:
@echo "πŸ’Ύ First pass: Export all models with merged tensors..."
@mkdir -p $(ONNX_DEST_DIR)
@files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found (first pass): $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
$(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \
m = onnx.load(src); \
d = os.path.join(dest_dir, os.path.basename(src)); \
onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \
done; \
echo "βœ… Done first pass"
export-merged-source-models-second-pass:
@echo "πŸ’Ύ Second pass: Converting large models to external data format..."
@files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found (second pass): $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
$(NATIVE_PYTHON) -c 'import onnx, os, sys; \
src = """'"$$item"'"""; \
total_size = os.path.getsize(src); \
d = os.path.join(dest_dir, os.path.basename(src)); \
total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \
needs_external = total_size > 2e9; \
onnx.save_model( \
onnx.load(src), \
d, \
save_as_external_data=needs_external, \
all_tensors_to_one_file=True, \
location=(os.path.basename(src) + ".data") if needs_external else None \
); \
not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \
' || exit 1; \
done; \
echo "βœ… Done second models"
all-in-one: export quantize clean-large-files fix-gpu-buffers export-merged-source-models
@echo "✨ All done! ONNX models exported, slimmed, quantized and fixed"
export: export-abcd export-e
@echo "βœ… Export complete"
export-abcd:
@echo "πŸš€ Exporting parts A, B, C, D..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
$(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"
export-e:
@echo "πŸš€ Exporting part E..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
$(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"
slim:
@echo "πŸ—œοΈ Slimming ONNX models..."
@files=`find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found: $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
onnxslim --verbose "$$item" "$$item" || exit 1; \
done; \
echo "βœ… Slimming complete"
quantize:
@echo "⚑ Starting quantization..."
for part in $(PARTS); do \
$(MAKE) quantize-$$part || exit 1; \
done
@echo "βœ… Quantization complete"
quantize-%:
@echo "⚑ Quantizing part $*..."
mkdir -p $(ONNX_DEST_DIR)
cd $(TRANSFORMERS_JS_PATH) && \
mkdir -p $(STAGING_DIR) && \
rm -f $(STAGING_DIR)/* && \
ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
EXTRA_FLAGS=""; \
if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
--input_folder '$(STAGING_DIR)' \
--output_folder '$(ONNX_DEST_DIR)' \
--mode q4f16 $$EXTRA_FLAGS
clean-large-files:
@echo "🧹 Removing ONNX files over 2GB..."
cd $(ONNX_DEST_DIR) && \
for f in $$(find . -name "*.onnx" -type f); do \
total_size=0; \
if [ -f "$$f"".data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
elif [ -f "$$f""_data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
else \
total_size=$$(stat -f %z "$$f"); \
fi; \
size_mb=$$(( total_size / 1048576 )); \
if [ $$total_size -ge 2147483648 ]; then \
echo " Removing $$f (size: $$size_mb MB)..."; \
rm -f "$$f" "$$f"".data" "$$f""_data"; \
fi \
done
@echo "βœ… Large file cleanup complete"
fix-gpu-buffers:
@echo "πŸ”§ Fixing GPU buffers for E models..."
@files=`find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found: $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
cd $(NATIVE_ANDROID) && .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite "$$item" || exit 1; \
done; \
echo "βœ… GPU buffer fixes complete"