serbog commited on
Commit
646ce9c
1 Parent(s): 55990e0

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +5 -18
handler.py CHANGED
@@ -1,4 +1,5 @@
1
  import numpy as np
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  from typing import Dict, List, Any
4
 
@@ -20,18 +21,13 @@ class EndpointHandler:
20
  def __init__(self, path=""):
21
  self.tokenizer = AutoTokenizer.from_pretrained(path)
22
  self.model = AutoModelForSequenceClassification.from_pretrained(path)
23
- self.id2label = {
24
- i: label for i, label in enumerate(self.model.config.id2label.values())
25
- }
26
  self.MAX_LENGTH = 512 # or any other max length you prefer
27
 
28
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
29
  # get inputs
30
  inputs = data.pop("inputs", data)
31
 
32
- encodings = self.tokenizer(
33
- inputs, padding=False, truncation=False, return_tensors="pt"
34
- )
35
 
36
  truncated_input_ids = middle_truncate(
37
  encodings["input_ids"][0].tolist(), self.MAX_LENGTH, self.tokenizer
@@ -46,21 +42,12 @@ class EndpointHandler:
46
  "attention_mask": torch.tensor([attention_masks]),
47
  }
48
 
 
 
49
  outputs = self.model(**truncated_encodings)
50
 
51
  # transform logits to probabilities and apply threshold
52
  probs = 1 / (1 + np.exp(-outputs.logits.detach().cpu().numpy()))
53
- predictions = (probs >= 0.5).astype(float)
54
-
55
- # transform predicted id's into actual label names
56
- predicted_labels = [
57
- self.id2label[idx]
58
- for idx, label in enumerate(predictions[0])
59
- if label == 1.0
60
- ]
61
 
62
  # You can return it in any format you like, here's an example:
63
- return [
64
- {"label": label, "score": prob}
65
- for label, prob in zip(predicted_labels, probs[0])
66
- ]
 
1
  import numpy as np
2
+ import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  from typing import Dict, List, Any
5
 
 
21
  def __init__(self, path=""):
22
  self.tokenizer = AutoTokenizer.from_pretrained(path)
23
  self.model = AutoModelForSequenceClassification.from_pretrained(path)
 
 
 
24
  self.MAX_LENGTH = 512 # or any other max length you prefer
25
 
26
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
27
  # get inputs
28
  inputs = data.pop("inputs", data)
29
 
30
+ encodings = self.tokenizer(inputs, padding=False, truncation=False)
 
 
31
 
32
  truncated_input_ids = middle_truncate(
33
  encodings["input_ids"][0].tolist(), self.MAX_LENGTH, self.tokenizer
 
42
  "attention_mask": torch.tensor([attention_masks]),
43
  }
44
 
45
+ truncated_encodings.set_format("torch")
46
+
47
  outputs = self.model(**truncated_encodings)
48
 
49
  # transform logits to probabilities and apply threshold
50
  probs = 1 / (1 + np.exp(-outputs.logits.detach().cpu().numpy()))
 
 
 
 
 
 
 
 
51
 
52
  # You can return it in any format you like, here's an example:
53
+ return [{"scores": probs}]