from onnxruntime.quantization import quantize_dynamic, QuantType models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"] for model in models: print(f"Quantizing model: {model}") output_model_name = f"{model[:-5]}-quantized.onnx" quantize_dynamic( model_input=model, model_output=output_model_name, per_channel=True, reduce_range=True, weight_type=QuantType.QUInt8, optimize_model=False ) print(f"Quantized model: {output_model_name}")