File size: 5,601 Bytes
5c5a02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00f8773
 
 
 
 
 
 
5c5a02d
 
00f8773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c5a02d
 
 
 
 
 
 
 
 
 
00f8773
5c5a02d
 
 
 
00f8773
5c5a02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
SHELL := /bin/bash

# Configuration variables
NATIVE_ANDROID = ../Native-LLM-for-Android
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools

# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3

# Model parts
PARTS = A B C D E

define progress_bar
	printf "\r   Progress: \033[1;32m["; \
	_done=$$(($1 * 20 / $2)); \
	for ((i=0; i<_done; i++)); do printf "="; done; \
	printf "\033[0m"; \
	_left=$$((20 - _done)); \
	for ((i=0; i<_left; i++)); do printf " "; done; \
	printf "\033[1;32m]\033[0m $1/$2  Processing: \033[1;34m%s\033[K\033[0m\r" "$3"
endef

# See https://github.com/pytorch/pytorch/issues/94280#issuecomment-2089196400
# Original export scripts export a bunch of tensor files, so we merge into one / two files instead.
export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass
	@echo "βœ… Exporting merged source models complete"

export-merged-source-models-first-pass:
	@echo "πŸ’Ύ First pass: Export all models with merged tensors..."
	@mkdir -p $(ONNX_DEST_DIR)
	@files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found (first pass): $$total"; \
	current=0; \
	for item in $$files; do \
	current=$$((current + 1)); \
	$(call progress_bar,$$current,$$total,$$item); \
	$(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \
		m = onnx.load(src); \
		d = os.path.join(dest_dir, os.path.basename(src)); \
		onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \
	done; \
	echo "βœ… Done first pass"

export-merged-source-models-second-pass:
	@echo "πŸ’Ύ Second pass: Converting large models to external data format..."
	@files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found (second pass): $$total"; \
	current=0; \
	for item in $$files; do \
		current=$$((current + 1)); \
		$(call progress_bar,$$current,$$total,$$item); \
		$(NATIVE_PYTHON) -c 'import onnx, os, sys; \
			src = """'"$$item"'"""; \
			total_size = os.path.getsize(src); \
			total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \
			needs_external = total_size > 2e9; \
			onnx.save_model( \
				onnx.load(src), \
				src, \
				save_as_external_data=needs_external, \
				all_tensors_to_one_file=True, \
				location=(os.path.basename(src) + ".data") if needs_external else None \
			); \
			not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \
			' || exit 1; \
	done; \
	echo "βœ… Done second models"


all-in-one: export quantize clean-large-files slim fix-gpu-buffers
	@echo "✨ All done! ONNX models exported, slimmed, quantized and fixed"

export: export-abcd export-e
	@echo "βœ… Export complete"

export-abcd:
	@echo "πŸš€ Exporting parts A, B, C, D..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	$(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"

export-e:
	@echo "πŸš€ Exporting part E..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	$(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"

slim:
	@echo "πŸ—œοΈ  Slimming ONNX models..."
	@files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \
	$(call progress_bar,$$files,onnxslim --verbose {} {})
	@echo "βœ… Slimming complete"

quantize:
	@echo "⚑ Starting quantization..."
	for part in $(PARTS); do \
		$(MAKE) quantize-$$part || exit 1; \
	done
	@echo "βœ… Quantization complete"

quantize-%:
	@echo "⚑ Quantizing part $*..."
	mkdir -p $(ONNX_DEST_DIR)
	cd $(TRANSFORMERS_JS_PATH) && \
	mkdir -p $(STAGING_DIR) && \
	rm -f $(STAGING_DIR)/* && \
	ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
	find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
	find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
	find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
	EXTRA_FLAGS=""; \
	if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
	echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
	PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
		--input_folder '$(STAGING_DIR)' \
		--output_folder '$(ONNX_DEST_DIR)' \
		--mode q4f16 $$EXTRA_FLAGS

clean-large-files:
	@echo "🧹 Removing ONNX files over 2GB..."
	cd $(ONNX_DEST_DIR) && \
	for f in $$(find . -name "*.onnx" -type f); do \
		total_size=0; \
		if [ -f "$$f"".data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
		elif [ -f "$$f""_data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
		else \
			total_size=$$(stat -f %z "$$f"); \
		fi; \
		size_mb=$$(( total_size / 1048576 )); \
		if [ $$total_size -ge 2147483648 ]; then \
			echo "   Removing $$f (size: $$size_mb MB)..."; \
			rm -f "$$f" "$$f"".data" "$$f""_data"; \
		fi \
	done
	@echo "βœ… Large file cleanup complete"

fix-gpu-buffers:
	@echo "πŸ”§ Fixing GPU buffers for E models..."
	cd $(NATIVE_ANDROID) && \
	files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \
	$(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {})
	@echo "βœ… GPU buffer fixes complete"