JoshuaChak's picture
Upload folder using huggingface_hub
7c071a8 verified
#!/bin/bash
set -ex
models=
mode="f16"
folder="tmp"
num_device=1
mode_args=""
device_args=""
quantize_args="--quantize F16"
addr_args=""
name=""
num_layers=
guess_len=
out_model=$name.bmodel
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--mode)
mode="$2"
shift 2
;;
--num_device)
num_device="$2"
shift 2
;;
--name)
name="$2"
shift 2
;;
--addr_mode)
addr_mode="$2"
shift 2
;;
--guess_len)
guess_len="$2"
shift 2
;;
*)
echo "Invalid option: $key" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
if [[ -z "$guess_len" ]]; then
echo "Error: --guess_len is required." >&2
exit 1
fi
if [ "$name" = "qwen1.5-7b" ]; then
num_layers=31
echo "Compile Qwen1.5-7B"
elif [ "$name" = "qwen1.5-4b" ]; then
num_layers=39
echo "Compile Qwen1.5-4B"
elif [ "$name" = "qwen1.5-1.8b" ]; then
num_layers=23
echo "Compile Qwen1.5-1.8B"
elif [ "$name" = "qwen1.5-0.5b" ]; then
num_layers=23
echo "Compile Qwen1.5-0.5B"
else
>&2 echo -e "Error: Invalid name $name, the input name must be \033[31mqwen1.5-1.8b|qwen1.5-7b\033[0m"
exit 1
fi
if [ x$mode == x"int8" ]; then
quantize_args="--quantize W8F16"
elif [ x$mode == x"f16" ]; then
quantize_args="--quantize F16"
elif [ x$mode == x"int4" ]; then
quantize_args="--quantize W4F16 --q_group_size 64"
else
echo "Error, unknown quantize mode"
exit 1
fi
if [ x$num_device != x1 ]; then
device_args="--num_device $num_device"
out_model=$name'_'$mode'_'$num_device'dev.bmodel'
else
out_model=$name'_'$mode'_1dev.bmodel'
fi
if [ x$addr_mode == x"io_alone" ]; then
addr_args="--addr_mode io_alone"
fi
outdir=${folder}/embedding
mkdir -p $outdir
pushd $outdir
model_transform.py \
--model_name embedding \
--model_def ../onnx/embedding.onnx \
--mlir embedding.mlir
model_deploy.py \
--mlir embedding.mlir \
--quantize F16 \
--quant_input \
--quant_output \
--chip bm1684x \
$device_args \
--model embedding.bmodel
model_transform.py \
--model_name embedding_cache \
--model_def ../onnx/embedding.onnx \
--input_shapes [[1,$guess_len]] \
--mlir embedding_cache.mlir
model_deploy.py \
--mlir embedding_cache.mlir \
--quantize F16 \
--quant_input \
--quant_output \
--chip bm1684x \
$device_args \
--model embedding_cache.bmodel
rm *.npz
models=$models' '$outdir'/embedding.bmodel '$outdir'/embedding_cache.bmodel '
popd
echo $models
outdir=${folder}/$mode"_"$num_device"dev"/lm_head
mkdir -p $outdir
pushd $outdir
model_transform.py \
--model_name lm_head \
--model_def ../../onnx/lm_head.onnx \
--mlir lm_head.mlir
model_deploy.py \
--mlir lm_head.mlir \
$quantize_args \
--quant_input \
--chip bm1684x \
$device_args \
--model lm_head.bmodel
model_transform.py \
--model_name greedy_head \
--model_def ../../onnx/greedy_head.onnx \
--mlir greedy_head.mlir
model_deploy.py \
--mlir greedy_head.mlir \
--chip bm1684x \
--model greedy_head.bmodel
model_transform.py \
--model_name penalty_sample_head \
--model_def ../../onnx/penalty_sample_head.onnx \
--mlir penalty_sample_head.mlir
model_deploy.py \
--mlir penalty_sample_head.mlir \
--chip bm1684x \
--model penalty_sample_head.bmodel
rm *.npz
models=${models}${outdir}'/lm_head.bmodel '$outdir'/greedy_head.bmodel '$outdir'/penalty_sample_head.bmodel '
popd
echo $models
outdir=${folder}/$mode"_"$num_device"dev"/block
mkdir -p $outdir
pushd $outdir
mkdir -p $outdir
for ((i=0; i<=$num_layers; i++)); do
model_transform.py \
--model_name block_$i \
--model_def ../../onnx/block_$i.onnx \
--mlir block_$i.mlir
model_deploy.py \
--mlir block_$i.mlir \
$quantize_args \
--quant_input \
--quant_output \
--chip bm1684x \
$device_args \
--model block_$i.bmodel
model_transform.py \
--model_name block_cache_$i \
--model_def ../../onnx/block_cache_${i}.onnx \
--mlir block_cache_$i.mlir
model_deploy.py \
--mlir block_cache_$i.mlir \
$quantize_args \
--quant_input \
--quant_output \
--chip bm1684x \
$device_args \
$addr_args \
--model block_cache_$i.bmodel
rm *.npz
models=${models}${outdir}'/block_'$i'.bmodel '$outdir'/block_cache_'$i'.bmodel '
done
popd
echo $models
model_tool --combine $models -o $out_model