File size: 3,866 Bytes
5c5a02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
.SHELLFLAGS := -e -c
SHELL := /bin/bash

# Configuration variables
NATIVE_ANDROID = ../Native-LLM-for-Android
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools

# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3

# Model parts
PARTS = A B C D E

define progress_bar
total=$$(echo $(1) | wc -w | tr -d ' '); \
current=0; \
for item in $(1); do \
    current=$$((current + 1)); \
    printf "\r   Progress: \033[1;32m["; \
    for ((i=0; i<current*20/total; i++)); do printf "="; done; \
    printf "\033[0m"; \
    for ((i=current*20/total; i<20; i++)); do printf " "; done; \
    printf "\033[1;32m]\033[0m $$current/$$total "; \
    printf "\033[1;34m$$item\033[K\033[0m\n"; \
    cmd="$(2)"; \
    cmd=$$(echo "$$cmd" | sed "s|{}|$$item|g"); \
    $$cmd; \
done; \
printf "\n"
endef

.PHONY: all all-in-one clean clean-large-files export fix-gpu-buffers quantize quantize-% slim

all-in-one: export quantize clean-large-files slim fix-gpu-buffers
	@echo "✨ All done! ONNX models exported, slimmed, quantized and fixed"

export: export-abcd export-e
	@echo "βœ… Export complete"

export-abcd:
	@echo "πŸš€ Exporting parts A, B, C, D..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	../../.venv/bin/python3 QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"

export-e:
	@echo "πŸš€ Exporting part E..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	../../.venv/bin/python3 QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"

slim:
	@echo "πŸ—œοΈ  Slimming ONNX models..."
	@files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \
	$(call progress_bar,$$files,onnxslim --verbose {} {})
	@echo "βœ… Slimming complete"

quantize:
	@echo "⚑ Starting quantization..."
	for part in $(PARTS); do \
		$(MAKE) quantize-$$part || exit 1; \
	done
	@echo "βœ… Quantization complete"

quantize-%:
	@echo "⚑ Quantizing part $*..."
	mkdir -p $(ONNX_DEST_DIR)
	cd $(TRANSFORMERS_JS_PATH) && \
	mkdir -p $(STAGING_DIR) && \
	rm -f $(STAGING_DIR)/* && \
	ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
	find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
	find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
	find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
	EXTRA_FLAGS=""; \
	if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
	echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
	PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
		--input_folder '$(STAGING_DIR)' \
		--output_folder '$(ONNX_DEST_DIR)' \
		--mode q4f16 $$EXTRA_FLAGS

clean-large-files:
	@echo "🧹 Removing ONNX files over 2GB..."
	cd $(ONNX_DEST_DIR) && \
	for f in $$(find . -name "*.onnx" -type f); do \
		total_size=0; \
		if [ -f "$$f"".data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
		elif [ -f "$$f""_data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
		else \
			total_size=$$(stat -f %z "$$f"); \
		fi; \
		size_mb=$$(( total_size / 1048576 )); \
		if [ $$total_size -ge 2147483648 ]; then \
			echo "   Removing $$f (size: $$size_mb MB)..."; \
			rm -f "$$f" "$$f"".data" "$$f""_data"; \
		fi \
	done
	@echo "βœ… Large file cleanup complete"

fix-gpu-buffers:
	@echo "πŸ”§ Fixing GPU buffers for E models..."
	cd $(NATIVE_ANDROID) && \
	files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \
	$(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {})
	@echo "βœ… GPU buffer fixes complete"