philschmid HF staff commited on
Commit
75a111d
1 Parent(s): 3d3b502

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +33 -27
handler.py CHANGED
@@ -1,34 +1,40 @@
1
  from typing import Dict, List, Any
2
- from optimum.onnxruntime import ORTModelForSequenceClassification
3
- from transformers import pipeline, AutoTokenizer
4
-
5
 
6
  class EndpointHandler():
7
  def __init__(self, path=""):
8
- # load the optimized model
9
- model = ORTModelForSequenceClassification.from_pretrained(path)
10
- tokenizer = AutoTokenizer.from_pretrained(path)
11
- # create inference pipeline
12
- self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
13
 
14
 
15
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
16
- """
17
- Args:
18
- data (:obj:):
19
- includes the input data and the parameters for the inference.
20
- Return:
21
- A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
22
- - "label": A string representing what the label/class is. There can be multiple labels.
23
- - "score": A score between 0 and 1 describing how confident the model is for this label/class.
24
- """
25
- inputs = data.pop("inputs", data)
26
- parameters = data.pop("parameters", None)
27
-
28
- # pass inputs with all kwargs in data
29
- if parameters is not None:
30
- prediction = self.pipeline(inputs, **parameters)
31
- else:
32
- prediction = self.pipeline(inputs)
33
- # postprocess the prediction
34
- return prediction
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, List, Any
2
+ from transformers import AutoTokenizer, LayoutLMForSequenceClassification
3
+ import torch
 
4
 
5
  class EndpointHandler():
6
  def __init__(self, path=""):
7
+ self.tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
8
+ self.model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased") # load the optimized model
 
 
 
9
 
10
 
11
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
12
+ words = ["Hello", "world"]
13
+ normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
14
+
15
+ token_boxes = []
16
+ for word, box in zip(words, normalized_word_boxes):
17
+ word_tokens = tokenizer.tokenize(word)
18
+ token_boxes.extend([box] * len(word_tokens))
19
+ # add bounding boxes of cls + sep tokens
20
+ token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
21
+
22
+ encoding = tokenizer(" ".join(words), return_tensors="pt")
23
+ input_ids = encoding["input_ids"]
24
+ attention_mask = encoding["attention_mask"]
25
+ token_type_ids = encoding["token_type_ids"]
26
+ bbox = torch.tensor([token_boxes])
27
+ sequence_label = torch.tensor([1])
28
+
29
+ outputs = self.model(
30
+ input_ids=input_ids,
31
+ bbox=bbox,
32
+ attention_mask=attention_mask,
33
+ token_type_ids=token_type_ids,
34
+ labels=sequence_label,
35
+ )
36
+
37
+ loss = outputs.loss
38
+ logits = outputs.logits
39
+ return {"logits": logits}
40
+