JoshuaChak
/

bmodel-qwen1.5-1.8b

Model card Files Files and versions Community

bmodel-qwen1.5-1.8b / Qwen1_5 /speculative_sample_demo /compile.sh

JoshuaChak

Upload folder using huggingface_hub

7c071a8 verified 8 months ago

raw

history blame contribute delete

4.8 kB

	#!/bin/bash
	set -ex
	models=
	mode="f16"
	folder="tmp"
	num_device=1
	mode_args=""
	device_args=""
	quantize_args="--quantize F16"
	addr_args=""
	name=""
	num_layers=
	guess_len=
	out_model=$name.bmodel

	while [[ $# -gt 0 ]]; do
	key="$1"

	case $key in
	--mode)
	mode="$2"
	shift 2
	;;
	--num_device)
	num_device="$2"
	shift 2
	;;
	--name)
	name="$2"
	shift 2
	;;
	--addr_mode)
	addr_mode="$2"
	shift 2
	;;
	--guess_len)
	guess_len="$2"
	shift 2
	;;
	*)
	echo "Invalid option: $key" >&2
	exit 1
	;;
	:)
	echo "Option -$OPTARG requires an argument." >&2
	exit 1
	;;
	esac
	done

	if [[ -z "$guess_len" ]]; then
	echo "Error: --guess_len is required." >&2
	exit 1
	fi

	if [ "$name" = "qwen1.5-7b" ]; then
	num_layers=31
	echo "Compile Qwen1.5-7B"
	elif [ "$name" = "qwen1.5-4b" ]; then
	num_layers=39
	echo "Compile Qwen1.5-4B"
	elif [ "$name" = "qwen1.5-1.8b" ]; then
	num_layers=23
	echo "Compile Qwen1.5-1.8B"
	elif [ "$name" = "qwen1.5-0.5b" ]; then
	num_layers=23
	echo "Compile Qwen1.5-0.5B"
	else
	>&2 echo -e "Error: Invalid name $name, the input name must be \033[31mqwen1.5-1.8b\|qwen1.5-7b\033[0m"
	exit 1
	fi

	if [ x$mode == x"int8" ]; then
	quantize_args="--quantize W8F16"
	elif [ x$mode == x"f16" ]; then
	quantize_args="--quantize F16"
	elif [ x$mode == x"int4" ]; then
	quantize_args="--quantize W4F16 --q_group_size 64"
	else
	echo "Error, unknown quantize mode"
	exit 1
	fi

	if [ x$num_device != x1 ]; then
	device_args="--num_device $num_device"
	out_model=$name'_'$mode'_'$num_device'dev.bmodel'
	else
	out_model=$name'_'$mode'_1dev.bmodel'
	fi

	if [ x$addr_mode == x"io_alone" ]; then
	addr_args="--addr_mode io_alone"
	fi

	outdir=${folder}/embedding
	mkdir -p $outdir
	pushd $outdir

	model_transform.py \
	--model_name embedding \
	--model_def ../onnx/embedding.onnx \
	--mlir embedding.mlir

	model_deploy.py \
	--mlir embedding.mlir \
	--quantize F16 \
	--quant_input \
	--quant_output \
	--chip bm1684x \
	$device_args \
	--model embedding.bmodel

	model_transform.py \
	--model_name embedding_cache \
	--model_def ../onnx/embedding.onnx \
	--input_shapes [[1,$guess_len]] \
	--mlir embedding_cache.mlir

	model_deploy.py \
	--mlir embedding_cache.mlir \
	--quantize F16 \
	--quant_input \
	--quant_output \
	--chip bm1684x \
	$device_args \
	--model embedding_cache.bmodel

	rm *.npz

	models=$models' '$outdir'/embedding.bmodel '$outdir'/embedding_cache.bmodel '

	popd

	echo $models

	outdir=${folder}/$mode"_"$num_device"dev"/lm_head
	mkdir -p $outdir
	pushd $outdir

	model_transform.py \
	--model_name lm_head \
	--model_def ../../onnx/lm_head.onnx \
	--mlir lm_head.mlir


	model_deploy.py \
	--mlir lm_head.mlir \
	$quantize_args \
	--quant_input \
	--chip bm1684x \
	$device_args \
	--model lm_head.bmodel


	model_transform.py \
	--model_name greedy_head \
	--model_def ../../onnx/greedy_head.onnx \
	--mlir greedy_head.mlir

	model_deploy.py \
	--mlir greedy_head.mlir \
	--chip bm1684x \
	--model greedy_head.bmodel


	model_transform.py \
	--model_name penalty_sample_head \
	--model_def ../../onnx/penalty_sample_head.onnx \
	--mlir penalty_sample_head.mlir

	model_deploy.py \
	--mlir penalty_sample_head.mlir \
	--chip bm1684x \
	--model penalty_sample_head.bmodel


	rm *.npz

	models=${models}${outdir}'/lm_head.bmodel '$outdir'/greedy_head.bmodel '$outdir'/penalty_sample_head.bmodel '
	popd

	echo $models

	outdir=${folder}/$mode"_"$num_device"dev"/block
	mkdir -p $outdir

	pushd $outdir
	mkdir -p $outdir

	for ((i=0; i<=$num_layers; i++)); do

	model_transform.py \
	--model_name block_$i \
	--model_def ../../onnx/block_$i.onnx \
	--mlir block_$i.mlir

	model_deploy.py \
	--mlir block_$i.mlir \
	$quantize_args \
	--quant_input \
	--quant_output \
	--chip bm1684x \
	$device_args \
	--model block_$i.bmodel

	model_transform.py \
	--model_name block_cache_$i \
	--model_def ../../onnx/block_cache_${i}.onnx \
	--mlir block_cache_$i.mlir

	model_deploy.py \
	--mlir block_cache_$i.mlir \
	$quantize_args \
	--quant_input \
	--quant_output \
	--chip bm1684x \
	$device_args \
	$addr_args \
	--model block_cache_$i.bmodel

	rm *.npz

	models=${models}${outdir}'/block_'$i'.bmodel '$outdir'/block_cache_'$i'.bmodel '

	done
	popd
	echo $models

	model_tool --combine $models -o $out_model