name: "deberta" platform: "tensorrt_plan" max_batch_size: 8 input [ { name: "input_ids" data_type: TYPE_INT32 dims: [ -1, 128 ] }, { name: "attention_mask" data_type: TYPE_INT32 dims: [ -1, 128 ] } ] output [ { name: "output" data_type: TYPE_FP32 dims: [ -1, 2 ] } ] dynamic_batching { preferred_batch_size: [ 4, 8 ] max_queue_delay_microseconds: 100 } instance_group [ { count: 1 kind: KIND_GPU } ]