File size: 1,672 Bytes
0c86405 75a111d f1a16dd 0c86405 75a111d 0c86405 75a111d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from typing import Dict, List, Any
from transformers import AutoTokenizer, LayoutLMForSequenceClassification
import torch
import os
os.system("sudo apt install -y tesseract-ocr")
os.system("pip3 install pytesseract==0.3.9")
class EndpointHandler():
def __init__(self, path=""):
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
self.model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased") # load the optimized model
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
words = ["Hello", "world"]
normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
token_boxes = []
for word, box in zip(words, normalized_word_boxes):
word_tokens = tokenizer.tokenize(word)
token_boxes.extend([box] * len(word_tokens))
# add bounding boxes of cls + sep tokens
token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
encoding = tokenizer(" ".join(words), return_tensors="pt")
input_ids = encoding["input_ids"]
attention_mask = encoding["attention_mask"]
token_type_ids = encoding["token_type_ids"]
bbox = torch.tensor([token_boxes])
sequence_label = torch.tensor([1])
outputs = self.model(
input_ids=input_ids,
bbox=bbox,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
labels=sequence_label,
)
loss = outputs.loss
logits = outputs.logits
return {"logits": logits}
|