from typing import Dict, List, Any from transformers import AutoTokenizer, LayoutLMForSequenceClassification import torch import os os.system("sudo apt install -y tesseract-ocr") os.system("pip3 install pytesseract==0.3.9") class EndpointHandler(): def __init__(self, path=""): self.tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased") self.model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased") # load the optimized model def __call__(self, data: Any) -> List[List[Dict[str, float]]]: words = ["Hello", "world"] normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] token_boxes = [] for word, box in zip(words, normalized_word_boxes): word_tokens = tokenizer.tokenize(word) token_boxes.extend([box] * len(word_tokens)) # add bounding boxes of cls + sep tokens token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] encoding = tokenizer(" ".join(words), return_tensors="pt") input_ids = encoding["input_ids"] attention_mask = encoding["attention_mask"] token_type_ids = encoding["token_type_ids"] bbox = torch.tensor([token_boxes]) sequence_label = torch.tensor([1]) outputs = self.model( input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=sequence_label, ) loss = outputs.loss logits = outputs.logits return {"logits": logits}