Upload handler.py
Browse files- handler.py +5 -18
handler.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import numpy as np
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
from typing import Dict, List, Any
|
4 |
|
@@ -20,18 +21,13 @@ class EndpointHandler:
|
|
20 |
def __init__(self, path=""):
|
21 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
22 |
self.model = AutoModelForSequenceClassification.from_pretrained(path)
|
23 |
-
self.id2label = {
|
24 |
-
i: label for i, label in enumerate(self.model.config.id2label.values())
|
25 |
-
}
|
26 |
self.MAX_LENGTH = 512 # or any other max length you prefer
|
27 |
|
28 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
29 |
# get inputs
|
30 |
inputs = data.pop("inputs", data)
|
31 |
|
32 |
-
encodings = self.tokenizer(
|
33 |
-
inputs, padding=False, truncation=False, return_tensors="pt"
|
34 |
-
)
|
35 |
|
36 |
truncated_input_ids = middle_truncate(
|
37 |
encodings["input_ids"][0].tolist(), self.MAX_LENGTH, self.tokenizer
|
@@ -46,21 +42,12 @@ class EndpointHandler:
|
|
46 |
"attention_mask": torch.tensor([attention_masks]),
|
47 |
}
|
48 |
|
|
|
|
|
49 |
outputs = self.model(**truncated_encodings)
|
50 |
|
51 |
# transform logits to probabilities and apply threshold
|
52 |
probs = 1 / (1 + np.exp(-outputs.logits.detach().cpu().numpy()))
|
53 |
-
predictions = (probs >= 0.5).astype(float)
|
54 |
-
|
55 |
-
# transform predicted id's into actual label names
|
56 |
-
predicted_labels = [
|
57 |
-
self.id2label[idx]
|
58 |
-
for idx, label in enumerate(predictions[0])
|
59 |
-
if label == 1.0
|
60 |
-
]
|
61 |
|
62 |
# You can return it in any format you like, here's an example:
|
63 |
-
return [
|
64 |
-
{"label": label, "score": prob}
|
65 |
-
for label, prob in zip(predicted_labels, probs[0])
|
66 |
-
]
|
|
|
1 |
import numpy as np
|
2 |
+
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
from typing import Dict, List, Any
|
5 |
|
|
|
21 |
def __init__(self, path=""):
|
22 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
23 |
self.model = AutoModelForSequenceClassification.from_pretrained(path)
|
|
|
|
|
|
|
24 |
self.MAX_LENGTH = 512 # or any other max length you prefer
|
25 |
|
26 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
27 |
# get inputs
|
28 |
inputs = data.pop("inputs", data)
|
29 |
|
30 |
+
encodings = self.tokenizer(inputs, padding=False, truncation=False)
|
|
|
|
|
31 |
|
32 |
truncated_input_ids = middle_truncate(
|
33 |
encodings["input_ids"][0].tolist(), self.MAX_LENGTH, self.tokenizer
|
|
|
42 |
"attention_mask": torch.tensor([attention_masks]),
|
43 |
}
|
44 |
|
45 |
+
truncated_encodings.set_format("torch")
|
46 |
+
|
47 |
outputs = self.model(**truncated_encodings)
|
48 |
|
49 |
# transform logits to probabilities and apply threshold
|
50 |
probs = 1 / (1 + np.exp(-outputs.logits.detach().cpu().numpy()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# You can return it in any format you like, here's an example:
|
53 |
+
return [{"scores": probs}]
|
|
|
|
|
|