|
model: |
|
name: model |
|
operator: |
|
input_data: |
|
type: Input |
|
output: |
|
input_ids:0: |
|
dtype: int32 |
|
shape: [-1, -1] |
|
token_type_ids:0: |
|
dtype: int32 |
|
shape: [-1, -1] |
|
attention_mask:0: |
|
dtype: int32 |
|
shape: [-1, -1] |
|
bert.embeddings.position_embeddings.weight:0: |
|
dtype: fp32 |
|
shape: [512, 256] |
|
location: [0, 524288] |
|
bert.embeddings.token_type_embeddings.weight:0: |
|
dtype: fp32 |
|
shape: [2, 256] |
|
location: [524288, 2048] |
|
bert.embeddings.word_embeddings.weight:0: |
|
dtype: fp32 |
|
shape: [30522, 256] |
|
location: [526336, 31254528] |
|
bert.embeddings.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31780864, 1024] |
|
bert.embeddings.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31781888, 1024] |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988776, 4] |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988780, 4] |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [31782920, 65536] |
|
bert.encoder.layer.0.attention.self.key.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [31848456, 1024] |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31849480, 1024] |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31850504, 1024] |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988800, 4] |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988804, 4] |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [31851544, 65536] |
|
bert.encoder.layer.0.attention.self.query.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [31917080, 1024] |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31918104, 1024] |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31919128, 1024] |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988792, 4] |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988796, 4] |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [31920168, 65536] |
|
bert.encoder.layer.0.attention.self.value.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [31985704, 1024] |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31986728, 1024] |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [31987752, 1024] |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988832, 4] |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988836, 4] |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988808, 4] |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988812, 4] |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988824, 4] |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [31988828, 4] |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32057456, 4] |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32057460, 4] |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [31988848, 65536] |
|
bert.encoder.layer.0.attention.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32054384, 1024] |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32055408, 1024] |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32056432, 1024] |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32057464, 4] |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32057468, 4] |
|
bert.encoder.layer.0.attention.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32057472, 1024] |
|
bert.encoder.layer.0.attention.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32058496, 1024] |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32333960, 4] |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32333964, 4] |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [1024, 256] |
|
location: [32059528, 262144] |
|
bert.encoder.layer.0.intermediate.dense.bias:0: |
|
dtype: s32 |
|
shape: [1024] |
|
location: [32321672, 4096] |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [32325768, 4096] |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [32329864, 4096] |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32599192, 4] |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32599196, 4] |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 1024] |
|
location: [32333976, 262144] |
|
bert.encoder.layer.0.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32596120, 1024] |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32597144, 1024] |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32598168, 1024] |
|
/bert/encoder/layer.0/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32599200, 4] |
|
/bert/encoder/layer.0/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32599204, 4] |
|
bert.encoder.layer.0.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32599208, 1024] |
|
bert.encoder.layer.0.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32600232, 1024] |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807120, 4] |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807124, 4] |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [32601264, 65536] |
|
bert.encoder.layer.1.attention.self.key.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32666800, 1024] |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32667824, 1024] |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32668848, 1024] |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807144, 4] |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807148, 4] |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [32669888, 65536] |
|
bert.encoder.layer.1.attention.self.query.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32735424, 1024] |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32736448, 1024] |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32737472, 1024] |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807136, 4] |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807140, 4] |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [32738512, 65536] |
|
bert.encoder.layer.1.attention.self.value.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32804048, 1024] |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32805072, 1024] |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32806096, 1024] |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807176, 4] |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807180, 4] |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807152, 4] |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807156, 4] |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807168, 4] |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32807172, 4] |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32875800, 4] |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32875804, 4] |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [32807192, 65536] |
|
bert.encoder.layer.1.attention.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [32872728, 1024] |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32873752, 1024] |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32874776, 1024] |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32875808, 4] |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [32875812, 4] |
|
bert.encoder.layer.1.attention.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32875816, 1024] |
|
bert.encoder.layer.1.attention.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [32876840, 1024] |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33152304, 4] |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33152308, 4] |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [1024, 256] |
|
location: [32877872, 262144] |
|
bert.encoder.layer.1.intermediate.dense.bias:0: |
|
dtype: s32 |
|
shape: [1024] |
|
location: [33140016, 4096] |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [33144112, 4096] |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [33148208, 4096] |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33417536, 4] |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33417540, 4] |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 1024] |
|
location: [33152320, 262144] |
|
bert.encoder.layer.1.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [33414464, 1024] |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33415488, 1024] |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33416512, 1024] |
|
/bert/encoder/layer.1/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33417544, 4] |
|
/bert/encoder/layer.1/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33417548, 4] |
|
bert.encoder.layer.1.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33417552, 1024] |
|
bert.encoder.layer.1.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33418576, 1024] |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625464, 4] |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625468, 4] |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [33419608, 65536] |
|
bert.encoder.layer.2.attention.self.key.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [33485144, 1024] |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33486168, 1024] |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33487192, 1024] |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625488, 4] |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625492, 4] |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [33488232, 65536] |
|
bert.encoder.layer.2.attention.self.query.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [33553768, 1024] |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33554792, 1024] |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33555816, 1024] |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625480, 4] |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625484, 4] |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [33556856, 65536] |
|
bert.encoder.layer.2.attention.self.value.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [33622392, 1024] |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33623416, 1024] |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33624440, 1024] |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625520, 4] |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625524, 4] |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625496, 4] |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625500, 4] |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625512, 4] |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33625516, 4] |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33694144, 4] |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33694148, 4] |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [33625536, 65536] |
|
bert.encoder.layer.2.attention.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [33691072, 1024] |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33692096, 1024] |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33693120, 1024] |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33694152, 4] |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33694156, 4] |
|
bert.encoder.layer.2.attention.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33694160, 1024] |
|
bert.encoder.layer.2.attention.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [33695184, 1024] |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33970648, 4] |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [33970652, 4] |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [1024, 256] |
|
location: [33696216, 262144] |
|
bert.encoder.layer.2.intermediate.dense.bias:0: |
|
dtype: s32 |
|
shape: [1024] |
|
location: [33958360, 4096] |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [33962456, 4096] |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [33966552, 4096] |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34235880, 4] |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34235884, 4] |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 1024] |
|
location: [33970664, 262144] |
|
bert.encoder.layer.2.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [34232808, 1024] |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34233832, 1024] |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34234856, 1024] |
|
/bert/encoder/layer.2/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34235888, 4] |
|
/bert/encoder/layer.2/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34235892, 4] |
|
bert.encoder.layer.2.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34235896, 1024] |
|
bert.encoder.layer.2.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34236920, 1024] |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443808, 4] |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443812, 4] |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [34237952, 65536] |
|
bert.encoder.layer.3.attention.self.key.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [34303488, 1024] |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34304512, 1024] |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34305536, 1024] |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443832, 4] |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443836, 4] |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [34306576, 65536] |
|
bert.encoder.layer.3.attention.self.query.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [34372112, 1024] |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34373136, 1024] |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34374160, 1024] |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443824, 4] |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443828, 4] |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [34375200, 65536] |
|
bert.encoder.layer.3.attention.self.value.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [34440736, 1024] |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34441760, 1024] |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34442784, 1024] |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443864, 4] |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443868, 4] |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443840, 4] |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443844, 4] |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443856, 4] |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34443860, 4] |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34512488, 4] |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34512492, 4] |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [34443880, 65536] |
|
bert.encoder.layer.3.attention.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [34509416, 1024] |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34510440, 1024] |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34511464, 1024] |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34512496, 4] |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34512500, 4] |
|
bert.encoder.layer.3.attention.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34512504, 1024] |
|
bert.encoder.layer.3.attention.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [34513528, 1024] |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34788992, 4] |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [34788996, 4] |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [1024, 256] |
|
location: [34514560, 262144] |
|
bert.encoder.layer.3.intermediate.dense.bias:0: |
|
dtype: s32 |
|
shape: [1024] |
|
location: [34776704, 4096] |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [34780800, 4096] |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [1024] |
|
location: [34784896, 4096] |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35054224, 4] |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35054228, 4] |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [256, 1024] |
|
location: [34789008, 262144] |
|
bert.encoder.layer.3.output.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [35051152, 1024] |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35052176, 1024] |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35053200, 1024] |
|
/bert/encoder/layer.3/output/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35054232, 4] |
|
/bert/encoder/layer.3/output/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35054236, 4] |
|
bert.encoder.layer.3.output.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35054240, 1024] |
|
bert.encoder.layer.3.output.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35055264, 1024] |
|
/bert/pooler/Gather_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35122856, 4] |
|
/bert/pooler/Gather_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35122860, 4] |
|
bert.pooler.dense.weight_quantized:0: |
|
dtype: s8 |
|
shape: [256, 256] |
|
location: [35056296, 65536] |
|
bert.pooler.dense.bias:0: |
|
dtype: s32 |
|
shape: [256] |
|
location: [35121832, 1024] |
|
bert.pooler.dense.weight_quantized:0_min: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35122864, 1024] |
|
bert.pooler.dense.weight_quantized:0_max: |
|
dtype: fp32 |
|
shape: [256] |
|
location: [35123888, 1024] |
|
/bert/pooler/activation/Tanh_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35125440, 4] |
|
/bert/pooler/activation/Tanh_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35125444, 4] |
|
classifier.weight_quantized:0: |
|
dtype: s8 |
|
shape: [256, 2] |
|
location: [35124920, 512] |
|
classifier.bias:0: |
|
dtype: s32 |
|
shape: [2] |
|
location: [35125432, 8] |
|
classifier.weight_quantized:0_min: |
|
dtype: fp32 |
|
shape: [2] |
|
location: [35125448, 8] |
|
classifier.weight_quantized:0_max: |
|
dtype: fp32 |
|
shape: [2] |
|
location: [35125456, 8] |
|
609:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35125464, 4] |
|
609:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [35125468, 4] |
|
position_embeddings/after/reshape: |
|
type: Reshape |
|
input: |
|
bert.embeddings.position_embeddings.weight:0: {} |
|
input_ids:0: {} |
|
output: |
|
position_embeddings/after/reshape:0: {} |
|
attr: |
|
dst_shape: 1,-1,256 |
|
dims: 1 |
|
/bert/embeddings/position_embeddings/Gather: |
|
type: Reshape |
|
input: |
|
position_embeddings/after/reshape:0: {} |
|
output: |
|
/bert/embeddings/position_embeddings/Gather_output_0:0: {} |
|
attr: |
|
dst_shape: 1,-1 |
|
/bert/Mul: |
|
type: PaddingSequence |
|
input: |
|
attention_mask:0: {} |
|
output: |
|
/bert/Mul_output_0:0: {} |
|
attr: |
|
dst_shape: -1,4,0,-1 |
|
dims: 1 |
|
word_embeddings/reshape: |
|
type: Reshape |
|
input: |
|
input_ids:0: {} |
|
output: |
|
word_embeddings/reshape:0: {} |
|
attr: |
|
dst_shape: -1 |
|
token_type_embeddings/reshape: |
|
type: Reshape |
|
input: |
|
token_type_ids:0: {} |
|
output: |
|
token_type_embeddings/reshape:0: {} |
|
attr: |
|
dst_shape: -1 |
|
/bert/embeddings/token_type_embeddings/Gather: |
|
type: Gather |
|
input: |
|
token_type_embeddings/reshape:0: {} |
|
bert.embeddings.token_type_embeddings.weight:0: {} |
|
/bert/embeddings/position_embeddings/Gather_output_0:0: {} |
|
token_type_ids:0: {} |
|
output: |
|
/bert/embeddings/token_type_embeddings/Gather:0: {} |
|
attr: |
|
axis: 0 |
|
batch_dims: 0 |
|
append_op: binary_add |
|
reshape: -1,-1,256 |
|
reshape_dims: 0,1 |
|
mul: 1,2 |
|
/bert/embeddings/word_embeddings/Gather: |
|
type: Gather |
|
input: |
|
word_embeddings/reshape:0: {} |
|
bert.embeddings.word_embeddings.weight:0: {} |
|
/bert/embeddings/token_type_embeddings/Gather:0: {} |
|
token_type_ids:0: {} |
|
output: |
|
embeddings_add/reshape_2d:0: {} |
|
attr: |
|
axis: 0 |
|
batch_dims: 0 |
|
append_op: binary_add |
|
reshape: -1,-1,256 |
|
reshape_dims: 0,1 |
|
mul: 1,2 |
|
/bert/embeddings/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
embeddings_add/reshape_2d:0: {} |
|
bert.embeddings.LayerNorm.weight:0: {} |
|
bert.embeddings.LayerNorm.bias:0: {} |
|
output: |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
/bert/encoder/layer.0/attention/self/key/Add_quant_0_Reorder_Post_0: |
|
type: Reorder |
|
input: |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0: {} |
|
output: |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
attr: |
|
src_perm: 0,1 |
|
dst_perm: 1,0 |
|
/bert/encoder/layer.0/attention/self/key/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.0/attention/self/key/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.0.attention.self.key.bias:0: {} |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/attention/self/key/Transpose_output_0_quantized:0_max: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.0/attention/self/query/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.0.attention.self.query.bias:0: {} |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/attention/self/query/Transpose_output_0_quantized:0_max: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.0/attention/self/value/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.0.attention.self.value.bias:0: {} |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/attention/self/value/Transpose_output_0_quantized:0_max: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.0/attention/self/Add: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0: {} |
|
/bert/Mul_output_0:0: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_2_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/bert/encoder/layer.0/attention/self/Softmax: |
|
type: Softmax |
|
input: |
|
/bert/encoder/layer.0/attention/self/Add_output_0:0: {} |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.0/attention/self/Transpose_3: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0: {} |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Softmax_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 256,-1 |
|
/bert/encoder/layer.0/attention/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0: {} |
|
bert.encoder.layer.0.attention.output.dense.bias:0: {} |
|
/bert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/attention/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/self/Reshape_3_output_0:0_max: {} |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.0/attention/output/Add_output_0:0: {} |
|
bert.encoder.layer.0.attention.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.0.attention.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.0.intermediate.dense.bias:0: {} |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/intermediate/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/bert/encoder/layer.0/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
bert.encoder.layer.0.output.dense.bias:0: {} |
|
/bert/encoder/layer.0/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.0/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
/bert/encoder/layer.0/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.0/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.0/output/Add_output_0:0: {} |
|
bert.encoder.layer.0.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.0.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.1/attention/self/key/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.1/attention/self/key/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.1.attention.self.key.bias:0: {} |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/attention/self/key/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.1/attention/self/query/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.1.attention.self.query.bias:0: {} |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/attention/self/query/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.1/attention/self/value/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.1.attention.self.value.bias:0: {} |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/attention/self/value/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.1/attention/self/Add: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0: {} |
|
/bert/Mul_output_0:0: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_2_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/bert/encoder/layer.1/attention/self/Softmax: |
|
type: Softmax |
|
input: |
|
/bert/encoder/layer.1/attention/self/Add_output_0:0: {} |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.1/attention/self/Transpose_3: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0: {} |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Softmax_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 256,-1 |
|
/bert/encoder/layer.1/attention/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0: {} |
|
bert.encoder.layer.1.attention.output.dense.bias:0: {} |
|
/bert/encoder/layer.0/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/attention/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/self/Reshape_3_output_0:0_max: {} |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.1/attention/output/Add_output_0:0: {} |
|
bert.encoder.layer.1.attention.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.1.attention.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.1.intermediate.dense.bias:0: {} |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/intermediate/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/bert/encoder/layer.1/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
bert.encoder.layer.1.output.dense.bias:0: {} |
|
/bert/encoder/layer.1/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.1/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
/bert/encoder/layer.1/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.1/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.1/output/Add_output_0:0: {} |
|
bert.encoder.layer.1.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.1.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.2/attention/self/key/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.2/attention/self/key/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.2.attention.self.key.bias:0: {} |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/attention/self/key/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.2/attention/self/query/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.2.attention.self.query.bias:0: {} |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/attention/self/query/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.2/attention/self/value/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.2.attention.self.value.bias:0: {} |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/attention/self/value/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.2/attention/self/Add: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0: {} |
|
/bert/Mul_output_0:0: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_2_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/bert/encoder/layer.2/attention/self/Softmax: |
|
type: Softmax |
|
input: |
|
/bert/encoder/layer.2/attention/self/Add_output_0:0: {} |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.2/attention/self/Transpose_3: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0: {} |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Softmax_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 256,-1 |
|
/bert/encoder/layer.2/attention/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0: {} |
|
bert.encoder.layer.2.attention.output.dense.bias:0: {} |
|
/bert/encoder/layer.1/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/attention/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/self/Reshape_3_output_0:0_max: {} |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.2/attention/output/Add_output_0:0: {} |
|
bert.encoder.layer.2.attention.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.2.attention.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.2.intermediate.dense.bias:0: {} |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/intermediate/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/bert/encoder/layer.2/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
bert.encoder.layer.2.output.dense.bias:0: {} |
|
/bert/encoder/layer.2/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.2/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
/bert/encoder/layer.2/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.2/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.2/output/Add_output_0:0: {} |
|
bert.encoder.layer.2.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.2.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.3/attention/self/key/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.3/attention/self/key/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.3.attention.self.key.bias:0: {} |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/attention/self/key/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.3/attention/self/query/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.3.attention.self.query.bias:0: {} |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/attention/self/query/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.3/attention/self/value/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.3.attention.self.value.bias:0: {} |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/attention/self/value/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 4,64,-1, -1 |
|
reshape_dims: '0' |
|
/bert/encoder/layer.3/attention/self/Add: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0: {} |
|
/bert/Mul_output_0:0: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_2_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/bert/encoder/layer.3/attention/self/Softmax: |
|
type: Softmax |
|
input: |
|
/bert/encoder/layer.3/attention/self/Add_output_0:0: {} |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.3/attention/self/Transpose_3: |
|
type: Matmul |
|
input: |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0: {} |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Softmax_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 256,-1 |
|
/bert/encoder/layer.3/attention/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0: {} |
|
bert.encoder.layer.3.attention.output.dense.bias:0: {} |
|
/bert/encoder/layer.2/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/attention/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/self/Reshape_3_output_0:0_max: {} |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.3/attention/output/Add_output_0:0: {} |
|
bert.encoder.layer.3.attention.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.3.attention.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_quant: {} |
|
bert.encoder.layer.3.intermediate.dense.bias:0: {} |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/intermediate/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/bert/encoder/layer.3/output/Add: |
|
type: InnerProduct |
|
input: |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0: {} |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0: {} |
|
bert.encoder.layer.3.output.dense.bias:0: {} |
|
/bert/encoder/layer.3/attention/output/LayerNorm/Add_1_output_0:0: {} |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0_min: {} |
|
/bert/encoder/layer.3/output/dense/Transpose_output_0_quantized:0_max: {} |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_min: {} |
|
/bert/encoder/layer.3/intermediate/intermediate_act_fn/Mul_1_output_0:0_max: {} |
|
/bert/encoder/layer.3/output/Add_output_0:0_min: {} |
|
/bert/encoder/layer.3/output/Add_output_0:0_max: {} |
|
output: |
|
/bert/encoder/layer.3/output/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/bert/encoder/layer.3/output/Add_Reorder_Recover: |
|
type: Reorder |
|
input: |
|
/bert/encoder/layer.3/output/Add_output_0:0: {} |
|
output: |
|
/bert/encoder/layer.3/output/Add_output_0:0_recover: {} |
|
attr: |
|
src_perm: 0,1 |
|
dst_perm: 1,0 |
|
/bert/encoder/layer.3/output/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/bert/encoder/layer.3/output/Add_output_0:0_recover: {} |
|
bert.encoder.layer.3.output.LayerNorm.weight:0: {} |
|
bert.encoder.layer.3.output.LayerNorm.bias:0: {} |
|
output: |
|
/bert/encoder/layer.3/output/LayerNorm/Add_1:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
last_layer_reshape: |
|
type: Reshape |
|
input: |
|
/bert/encoder/layer.3/output/LayerNorm/Add_1:0: {} |
|
input_ids:0: {} |
|
output: |
|
last_layer_reshape:0: {} |
|
attr: |
|
dst_shape: -1,-1,256 |
|
dims: '0' |
|
last_layer_strided_slice: |
|
type: StridedSlice |
|
input: |
|
last_layer_reshape:0: {} |
|
output: |
|
last_layer_strided_slice:0: {} |
|
attr: |
|
begin_mask: 5 |
|
ellipsis_mask: 0 |
|
end_mask: 5 |
|
new_axis_mask: 0 |
|
shrink_axis_mask: 0 |
|
begin: 0,0,0 |
|
end: 0,1,0 |
|
strides: 1,1,1 |
|
/bert/pooler/Gather: |
|
type: Reshape |
|
input: |
|
last_layer_strided_slice:0: {} |
|
output: |
|
/bert/pooler/Gather_output_0:0: {} |
|
attr: |
|
dst_shape: -1,256 |
|
/bert/pooler/activation/Tanh_quant_0: |
|
type: Quantize |
|
input: |
|
/bert/pooler/Gather_output_0:0: {} |
|
/bert/pooler/Gather_output_0:0_min: {} |
|
/bert/pooler/Gather_output_0:0_max: {} |
|
output: |
|
/bert/pooler/Gather_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/bert/pooler/activation/Tanh: |
|
type: InnerProduct |
|
input: |
|
/bert/pooler/Gather_output_0:0_quant: {} |
|
bert.pooler.dense.weight_quantized:0: {} |
|
bert.pooler.dense.bias:0: {} |
|
/bert/pooler/Gather_output_0:0_min: {} |
|
/bert/pooler/Gather_output_0:0_max: {} |
|
bert.pooler.dense.weight_quantized:0_min: {} |
|
bert.pooler.dense.weight_quantized:0_max: {} |
|
/bert/pooler/activation/Tanh_output_0:0_min: {} |
|
/bert/pooler/activation/Tanh_output_0:0_max: {} |
|
output: |
|
/bert/pooler/activation/Tanh_output_0:0: {} |
|
attr: |
|
src1_perm: 1,0 |
|
append_op: tanh |
|
output_dtype: u8 |
|
/classifier/Gemm_Add: |
|
type: InnerProduct |
|
input: |
|
/bert/pooler/activation/Tanh_output_0:0: {} |
|
classifier.weight_quantized:0: {} |
|
classifier.bias:0: {} |
|
/bert/pooler/activation/Tanh_output_0:0_min: {} |
|
/bert/pooler/activation/Tanh_output_0:0_max: {} |
|
classifier.weight_quantized:0_min: {} |
|
classifier.weight_quantized:0_max: {} |
|
609:0_min: {} |
|
609:0_max: {} |
|
output: |
|
'609:0': {} |
|
attr: |
|
src1_perm: 1,0 |
|
output_data: |
|
type: Output |
|
input: |
|
'609:0': {} |
|
|