|
.SHELLFLAGS := -e -c |
|
SHELL := /bin/bash |
|
|
|
|
|
NATIVE_ANDROID = ../Native-LLM-for-Android |
|
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL |
|
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx |
|
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist |
|
STAGING_DIR = /tmp/transformers.js/staging |
|
TRANSFORMERS_JS_PATH = ../transformers.js |
|
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools |
|
|
|
|
|
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3 |
|
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3 |
|
|
|
|
|
PARTS = A B C D E |
|
|
|
define progress_bar |
|
total=$$(echo $(1) | wc -w | tr -d ' '); \ |
|
current=0; \ |
|
for item in $(1); do \ |
|
current=$$((current + 1)); \ |
|
printf "\r Progress: \033[1;32m["; \ |
|
for ((i=0; i<current*20/total; i++)); do printf "="; done; \ |
|
printf "\033[0m"; \ |
|
for ((i=current*20/total; i<20; i++)); do printf " "; done; \ |
|
printf "\033[1;32m]\033[0m $$current/$$total "; \ |
|
printf "\033[1;34m$$item\033[K\033[0m\n"; \ |
|
cmd="$(2)"; \ |
|
cmd=$$(echo "$$cmd" | sed "s|{}|$$item|g"); \ |
|
$$cmd; \ |
|
done; \ |
|
printf "\n" |
|
endef |
|
|
|
.PHONY: all all-in-one clean clean-large-files export fix-gpu-buffers quantize quantize-% slim |
|
|
|
all-in-one: export quantize clean-large-files slim fix-gpu-buffers |
|
@echo "β¨ All done! ONNX models exported, slimmed, quantized and fixed" |
|
|
|
export: export-abcd export-e |
|
@echo "β
Export complete" |
|
|
|
export-abcd: |
|
@echo "π Exporting parts A, B, C, D..." |
|
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ |
|
../../.venv/bin/python3 QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct" |
|
|
|
export-e: |
|
@echo "π Exporting part E..." |
|
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ |
|
../../.venv/bin/python3 QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct" |
|
|
|
slim: |
|
@echo "ποΈ Slimming ONNX models..." |
|
@files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \ |
|
$(call progress_bar,$$files,onnxslim --verbose {} {}) |
|
@echo "β
Slimming complete" |
|
|
|
quantize: |
|
@echo "β‘ Starting quantization..." |
|
for part in $(PARTS); do \ |
|
$(MAKE) quantize-$$part || exit 1; \ |
|
done |
|
@echo "β
Quantization complete" |
|
|
|
quantize-%: |
|
@echo "β‘ Quantizing part $*..." |
|
mkdir -p $(ONNX_DEST_DIR) |
|
cd $(TRANSFORMERS_JS_PATH) && \ |
|
mkdir -p $(STAGING_DIR) && \ |
|
rm -f $(STAGING_DIR)/* && \ |
|
ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \ |
|
find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \ |
|
find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \ |
|
find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \ |
|
EXTRA_FLAGS=""; \ |
|
if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \ |
|
echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \ |
|
PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \ |
|
--input_folder '$(STAGING_DIR)' \ |
|
--output_folder '$(ONNX_DEST_DIR)' \ |
|
--mode q4f16 $$EXTRA_FLAGS |
|
|
|
clean-large-files: |
|
@echo "π§Ή Removing ONNX files over 2GB..." |
|
cd $(ONNX_DEST_DIR) && \ |
|
for f in $$(find . -name "*.onnx" -type f); do \ |
|
total_size=0; \ |
|
if [ -f "$$f"".data" ]; then \ |
|
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \ |
|
elif [ -f "$$f""_data" ]; then \ |
|
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \ |
|
else \ |
|
total_size=$$(stat -f %z "$$f"); \ |
|
fi; \ |
|
size_mb=$$(( total_size / 1048576 )); \ |
|
if [ $$total_size -ge 2147483648 ]; then \ |
|
echo " Removing $$f (size: $$size_mb MB)..."; \ |
|
rm -f "$$f" "$$f"".data" "$$f""_data"; \ |
|
fi \ |
|
done |
|
@echo "β
Large file cleanup complete" |
|
|
|
fix-gpu-buffers: |
|
@echo "π§ Fixing GPU buffers for E models..." |
|
cd $(NATIVE_ANDROID) && \ |
|
files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \ |
|
$(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {}) |
|
@echo "β
GPU buffer fixes complete" |
|
|