File size: 3,866 Bytes
5c5a02d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
.SHELLFLAGS := -e -c
SHELL := /bin/bash
# Configuration variables
NATIVE_ANDROID = ../Native-LLM-for-Android
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools
# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3
# Model parts
PARTS = A B C D E
define progress_bar
total=$$(echo $(1) | wc -w | tr -d ' '); \
current=0; \
for item in $(1); do \
current=$$((current + 1)); \
printf "\r Progress: \033[1;32m["; \
for ((i=0; i<current*20/total; i++)); do printf "="; done; \
printf "\033[0m"; \
for ((i=current*20/total; i<20; i++)); do printf " "; done; \
printf "\033[1;32m]\033[0m $$current/$$total "; \
printf "\033[1;34m$$item\033[K\033[0m\n"; \
cmd="$(2)"; \
cmd=$$(echo "$$cmd" | sed "s|{}|$$item|g"); \
$$cmd; \
done; \
printf "\n"
endef
.PHONY: all all-in-one clean clean-large-files export fix-gpu-buffers quantize quantize-% slim
all-in-one: export quantize clean-large-files slim fix-gpu-buffers
@echo "β¨ All done! ONNX models exported, slimmed, quantized and fixed"
export: export-abcd export-e
@echo "β
Export complete"
export-abcd:
@echo "π Exporting parts A, B, C, D..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
../../.venv/bin/python3 QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"
export-e:
@echo "π Exporting part E..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
../../.venv/bin/python3 QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"
slim:
@echo "ποΈ Slimming ONNX models..."
@files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \
$(call progress_bar,$$files,onnxslim --verbose {} {})
@echo "β
Slimming complete"
quantize:
@echo "β‘ Starting quantization..."
for part in $(PARTS); do \
$(MAKE) quantize-$$part || exit 1; \
done
@echo "β
Quantization complete"
quantize-%:
@echo "β‘ Quantizing part $*..."
mkdir -p $(ONNX_DEST_DIR)
cd $(TRANSFORMERS_JS_PATH) && \
mkdir -p $(STAGING_DIR) && \
rm -f $(STAGING_DIR)/* && \
ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
EXTRA_FLAGS=""; \
if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
--input_folder '$(STAGING_DIR)' \
--output_folder '$(ONNX_DEST_DIR)' \
--mode q4f16 $$EXTRA_FLAGS
clean-large-files:
@echo "π§Ή Removing ONNX files over 2GB..."
cd $(ONNX_DEST_DIR) && \
for f in $$(find . -name "*.onnx" -type f); do \
total_size=0; \
if [ -f "$$f"".data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
elif [ -f "$$f""_data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
else \
total_size=$$(stat -f %z "$$f"); \
fi; \
size_mb=$$(( total_size / 1048576 )); \
if [ $$total_size -ge 2147483648 ]; then \
echo " Removing $$f (size: $$size_mb MB)..."; \
rm -f "$$f" "$$f"".data" "$$f""_data"; \
fi \
done
@echo "β
Large file cleanup complete"
fix-gpu-buffers:
@echo "π§ Fixing GPU buffers for E models..."
cd $(NATIVE_ANDROID) && \
files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \
$(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {})
@echo "β
GPU buffer fixes complete"
|