name: "deberta" | |
platform: "tensorrt_plan" | |
max_batch_size: 8 | |
input [ | |
{ | |
name: "input_ids" | |
data_type: TYPE_INT32 | |
dims: [ -1, 128 ] | |
}, | |
{ | |
name: "attention_mask" | |
data_type: TYPE_INT32 | |
dims: [ -1, 128 ] | |
} | |
] | |
output [ | |
{ | |
name: "output" | |
data_type: TYPE_FP32 | |
dims: [ -1, 2 ] | |
} | |
] | |
dynamic_batching { | |
preferred_batch_size: [ 4, 8 ] | |
max_queue_delay_microseconds: 100 | |
} | |
instance_group [ | |
{ | |
count: 1 | |
kind: KIND_GPU | |
} | |
] |