menimeni123
commited on
Commit
·
91cf739
1
Parent(s):
47f6387
latest
Browse files- handler.py +8 -60
handler.py
CHANGED
@@ -1,90 +1,38 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
-
import math
|
4 |
from joblib import load
|
5 |
-
from transformers import BertTokenizer
|
6 |
-
from transformers.models.bert.modeling_bert import BertSelfAttention
|
7 |
-
|
8 |
-
class BertSdpaSelfAttention(BertSelfAttention):
|
9 |
-
def __init__(self, config):
|
10 |
-
super().__init__(config)
|
11 |
-
self.sdpa_head = torch.nn.Linear(config.hidden_size, config.hidden_size)
|
12 |
-
|
13 |
-
def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, past_key_value=None, output_attentions=False):
|
14 |
-
mixed_query_layer = self.query(hidden_states)
|
15 |
-
mixed_key_layer = self.key(hidden_states)
|
16 |
-
mixed_value_layer = self.value(hidden_states)
|
17 |
-
|
18 |
-
query_layer = self.transpose_for_scores(mixed_query_layer)
|
19 |
-
key_layer = self.transpose_for_scores(mixed_key_layer)
|
20 |
-
value_layer = self.transpose_for_scores(mixed_value_layer)
|
21 |
-
|
22 |
-
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
|
23 |
-
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
|
24 |
-
|
25 |
-
if attention_mask is not None:
|
26 |
-
attention_scores = attention_scores + attention_mask
|
27 |
-
|
28 |
-
attention_probs = torch.nn.functional.softmax(attention_scores, dim=-1)
|
29 |
-
attention_probs = self.dropout(attention_probs)
|
30 |
-
|
31 |
-
if head_mask is not None:
|
32 |
-
attention_probs = attention_probs * head_mask
|
33 |
-
|
34 |
-
context_layer = torch.matmul(attention_probs, value_layer)
|
35 |
-
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
|
36 |
-
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
|
37 |
-
context_layer = context_layer.view(*new_context_layer_shape)
|
38 |
-
|
39 |
-
sdpa_output = self.sdpa_head(context_layer)
|
40 |
-
|
41 |
-
outputs = (sdpa_output, attention_probs) if output_attentions else (sdpa_output,)
|
42 |
-
return outputs
|
43 |
|
44 |
class EndpointHandler:
|
45 |
def __init__(self, path=""):
|
46 |
-
|
47 |
-
setattr(torch.nn.modules, 'BertSdpaSelfAttention', BertSdpaSelfAttention)
|
48 |
-
|
49 |
-
# Get the directory of the current script
|
50 |
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
51 |
-
model_path = os.path.join(current_dir, "model.joblib")
|
52 |
-
|
53 |
-
self.model = self.load_model(model_path)
|
54 |
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
55 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
56 |
self.model.to(self.device)
|
57 |
|
58 |
-
def load_model(self, model_path):
|
59 |
-
try:
|
60 |
-
globals()['BertSdpaSelfAttention'] = BertSdpaSelfAttention
|
61 |
-
if not os.path.exists(model_path):
|
62 |
-
raise FileNotFoundError(f"Model file not found: {model_path}")
|
63 |
-
model = load(model_path)
|
64 |
-
return model
|
65 |
-
except Exception as e:
|
66 |
-
raise RuntimeError(f"Error loading model: {str(e)}")
|
67 |
-
|
68 |
def __call__(self, data):
|
69 |
inputs = data.pop("inputs", data)
|
70 |
|
|
|
71 |
if isinstance(inputs, str):
|
72 |
inputs = [inputs]
|
73 |
|
|
|
74 |
encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
|
75 |
|
|
|
76 |
input_ids = encoded_inputs['input_ids'].to(self.device)
|
77 |
attention_mask = encoded_inputs['attention_mask'].to(self.device)
|
78 |
|
|
|
79 |
with torch.no_grad():
|
80 |
outputs = self.model(input_ids, attention_mask=attention_mask)
|
81 |
logits = outputs.logits
|
82 |
probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
83 |
predictions = torch.argmax(probabilities, dim=-1)
|
84 |
|
|
|
85 |
class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
|
86 |
results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
|
87 |
|
88 |
-
return {"predictions": results}
|
89 |
-
|
90 |
-
handler = EndpointHandler()
|
|
|
1 |
import os
|
2 |
import torch
|
|
|
3 |
from joblib import load
|
4 |
+
from transformers import BertTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
class EndpointHandler:
|
7 |
def __init__(self, path=""):
|
8 |
+
self.model = load(os.path.join(path, "model.joblib"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
10 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
11 |
self.model.to(self.device)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def __call__(self, data):
|
14 |
inputs = data.pop("inputs", data)
|
15 |
|
16 |
+
# Ensure inputs is a list
|
17 |
if isinstance(inputs, str):
|
18 |
inputs = [inputs]
|
19 |
|
20 |
+
# Tokenize inputs
|
21 |
encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
|
22 |
|
23 |
+
# Move inputs to the correct device
|
24 |
input_ids = encoded_inputs['input_ids'].to(self.device)
|
25 |
attention_mask = encoded_inputs['attention_mask'].to(self.device)
|
26 |
|
27 |
+
# Perform inference
|
28 |
with torch.no_grad():
|
29 |
outputs = self.model(input_ids, attention_mask=attention_mask)
|
30 |
logits = outputs.logits
|
31 |
probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
32 |
predictions = torch.argmax(probabilities, dim=-1)
|
33 |
|
34 |
+
# Convert predictions to human-readable labels
|
35 |
class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
|
36 |
results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
|
37 |
|
38 |
+
return {"predictions": results}
|
|
|
|