|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
This module is copy-pasted in generated Triton configuration folder to perform the tokenization step. |
|
""" |
|
|
|
|
|
from pathlib import Path |
|
from typing import Dict, List |
|
|
|
import numpy as np |
|
|
|
|
|
try: |
|
|
|
import triton_python_backend_utils as pb_utils |
|
except ImportError: |
|
pass |
|
|
|
from transformers import AutoTokenizer, BatchEncoding, PreTrainedTokenizer, TensorType |
|
|
|
|
|
class TritonPythonModel: |
|
tokenizer: PreTrainedTokenizer |
|
|
|
def initialize(self, args: Dict[str, str]) -> None: |
|
""" |
|
Initialize the tokenization process |
|
:param args: arguments from Triton config file |
|
""" |
|
|
|
|
|
path: str = str(Path(args["model_repository"]).parent.absolute()) |
|
self.tokenizer = AutoTokenizer.from_pretrained(path) |
|
|
|
def execute(self, requests) -> "List[List[pb_utils.Tensor]]": |
|
""" |
|
Parse and tokenize each request |
|
:param requests: 1 or more requests received by Triton server. |
|
:return: text as input tensors |
|
""" |
|
responses = [] |
|
|
|
for request in requests: |
|
|
|
query = [t.decode("UTF-8") for t in pb_utils.get_input_tensor_by_name(request, "TEXT").as_numpy().tolist()] |
|
tokens: BatchEncoding = self.tokenizer( |
|
text=query, return_tensors=TensorType.NUMPY, padding=True, pad_to_multiple_of=8 |
|
) |
|
|
|
tokens_dict = {k: v.astype(np.int32) for k, v in tokens.items()} |
|
|
|
outputs = list() |
|
for input_name in self.tokenizer.model_input_names: |
|
tensor_input = pb_utils.Tensor(input_name, tokens_dict[input_name]) |
|
outputs.append(tensor_input) |
|
|
|
inference_response = pb_utils.InferenceResponse(output_tensors=outputs) |
|
responses.append(inference_response) |
|
|
|
return responses |
|
|