SHELL := /bin/bash # Configuration variables NATIVE_ANDROID = $(abspath ../Native-LLM-for-Android) QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist STAGING_DIR = /tmp/transformers.js/staging TRANSFORMERS_JS_PATH = ../transformers.js ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools # Python paths from venvs NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3 TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3 # Model parts PARTS = A B C D E define progress_bar printf "\r Progress: \033[1;32m["; \ _done=$$(($1 * 20 / $2)); \ for ((i=0; i<_done; i++)); do printf "="; done; \ printf "\033[0m"; \ _left=$$((20 - _done)); \ for ((i=0; i<_left; i++)); do printf " "; done; \ printf "\033[1;32m]\033[0m $1/$2 Processing: \033[1;34m%s\033[K\033[0m\r" "$3" endef # See https://github.com/pytorch/pytorch/issues/94280#issuecomment-2089196400 # Original export scripts export a bunch of tensor files, so we merge into one / two files instead. export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass @echo "โœ… Exporting merged source models complete" export-merged-source-models-first-pass: @echo "๐Ÿ’พ First pass: Export all models with merged tensors..." @mkdir -p $(ONNX_DEST_DIR) @files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \ total=`echo "$$files" | wc -w | tr -d ' '`; \ echo "Files found (first pass): $$total"; \ current=0; \ for item in $$files; do \ current=$$((current + 1)); \ $(call progress_bar,$$current,$$total,$$item); \ $(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \ m = onnx.load(src); \ d = os.path.join(dest_dir, os.path.basename(src)); \ onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \ done; \ echo "โœ… Done first pass" export-merged-source-models-second-pass: @echo "๐Ÿ’พ Second pass: Converting large models to external data format..." @files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \ total=`echo "$$files" | wc -w | tr -d ' '`; \ echo "Files found (second pass): $$total"; \ current=0; \ for item in $$files; do \ current=$$((current + 1)); \ $(call progress_bar,$$current,$$total,$$item); \ $(NATIVE_PYTHON) -c 'import onnx, os, sys; \ src = """'"$$item"'"""; \ total_size = os.path.getsize(src); \ d = os.path.join(dest_dir, os.path.basename(src)); \ total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \ needs_external = total_size > 2e9; \ onnx.save_model( \ onnx.load(src), \ d, \ save_as_external_data=needs_external, \ all_tensors_to_one_file=True, \ location=(os.path.basename(src) + ".data") if needs_external else None \ ); \ not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \ ' || exit 1; \ done; \ echo "โœ… Done second models" all-in-one: export quantize clean-large-files fix-gpu-buffers export-merged-source-models @echo "โœจ All done! ONNX models exported, slimmed, quantized and fixed" export: export-abcd export-e @echo "โœ… Export complete" export-abcd: @echo "๐Ÿš€ Exporting parts A, B, C, D..." cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ $(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct" export-e: @echo "๐Ÿš€ Exporting part E..." cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ $(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct" slim: @echo "๐Ÿ—œ๏ธ Slimming ONNX models..." @files=`find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"`; \ total=`echo "$$files" | wc -w | tr -d ' '`; \ echo "Files found: $$total"; \ current=0; \ for item in $$files; do \ current=$$((current + 1)); \ $(call progress_bar,$$current,$$total,$$item); \ onnxslim --verbose "$$item" "$$item" || exit 1; \ done; \ echo "โœ… Slimming complete" quantize: @echo "โšก Starting quantization..." for part in $(PARTS); do \ $(MAKE) quantize-$$part || exit 1; \ done @echo "โœ… Quantization complete" quantize-%: @echo "โšก Quantizing part $*..." mkdir -p $(ONNX_DEST_DIR) cd $(TRANSFORMERS_JS_PATH) && \ mkdir -p $(STAGING_DIR) && \ rm -f $(STAGING_DIR)/* && \ ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \ find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \ find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \ find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \ EXTRA_FLAGS=""; \ if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \ echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \ PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \ --input_folder '$(STAGING_DIR)' \ --output_folder '$(ONNX_DEST_DIR)' \ --mode q4f16 $$EXTRA_FLAGS clean-large-files: @echo "๐Ÿงน Removing ONNX files over 2GB..." cd $(ONNX_DEST_DIR) && \ for f in $$(find . -name "*.onnx" -type f); do \ total_size=0; \ if [ -f "$$f"".data" ]; then \ total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \ elif [ -f "$$f""_data" ]; then \ total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \ else \ total_size=$$(stat -f %z "$$f"); \ fi; \ size_mb=$$(( total_size / 1048576 )); \ if [ $$total_size -ge 2147483648 ]; then \ echo " Removing $$f (size: $$size_mb MB)..."; \ rm -f "$$f" "$$f"".data" "$$f""_data"; \ fi \ done @echo "โœ… Large file cleanup complete" fix-gpu-buffers: @echo "๐Ÿ”ง Fixing GPU buffers for E models..." @files=`find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f`; \ total=`echo "$$files" | wc -w | tr -d ' '`; \ echo "Files found: $$total"; \ current=0; \ for item in $$files; do \ current=$$((current + 1)); \ $(call progress_bar,$$current,$$total,$$item); \ cd $(NATIVE_ANDROID) && .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite "$$item" || exit 1; \ done; \ echo "โœ… GPU buffer fixes complete"