menimeni123
/

helem-llm

Joblib

Safetensors

distilbert

Inference Endpoints

Model card Files Files and versions Community

menimeni123 commited on Sep 12, 2024

Commit

91cf739

1 Parent(s): 47f6387

latest

Browse files

Files changed (1) hide show

handler.py +8 -60

handler.py CHANGED Viewed

@@ -1,90 +1,38 @@
 import os
 import torch
-import math
 from joblib import load
-from transformers import BertTokenizer, BertModel
-from transformers.models.bert.modeling_bert import BertSelfAttention
-class BertSdpaSelfAttention(BertSelfAttention):
-    def __init__(self, config):
-        super().__init__(config)
-        self.sdpa_head = torch.nn.Linear(config.hidden_size, config.hidden_size)
-    def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, past_key_value=None, output_attentions=False):
-        mixed_query_layer = self.query(hidden_states)
-        mixed_key_layer = self.key(hidden_states)
-        mixed_value_layer = self.value(hidden_states)
-        query_layer = self.transpose_for_scores(mixed_query_layer)
-        key_layer = self.transpose_for_scores(mixed_key_layer)
-        value_layer = self.transpose_for_scores(mixed_value_layer)
-        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
-        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
-        if attention_mask is not None:
-            attention_scores = attention_scores + attention_mask
-        attention_probs = torch.nn.functional.softmax(attention_scores, dim=-1)
-        attention_probs = self.dropout(attention_probs)
-        if head_mask is not None:
-            attention_probs = attention_probs * head_mask
-        context_layer = torch.matmul(attention_probs, value_layer)
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
-        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
-        context_layer = context_layer.view(*new_context_layer_shape)
-        sdpa_output = self.sdpa_head(context_layer)
-        outputs = (sdpa_output, attention_probs) if output_attentions else (sdpa_output,)
-        return outputs
 class EndpointHandler:
     def __init__(self, path=""):
-        # Register the custom class
-        setattr(torch.nn.modules, 'BertSdpaSelfAttention', BertSdpaSelfAttention)
-        # Get the directory of the current script
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        model_path = os.path.join(current_dir, "model.joblib")
-        self.model = self.load_model(model_path)
         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
-    def load_model(self, model_path):
-        try:
-            globals()['BertSdpaSelfAttention'] = BertSdpaSelfAttention
-            if not os.path.exists(model_path):
-                raise FileNotFoundError(f"Model file not found: {model_path}")
-            model = load(model_path)
-            return model
-        except Exception as e:
-            raise RuntimeError(f"Error loading model: {str(e)}")
     def __call__(self, data):
         inputs = data.pop("inputs", data)
         if isinstance(inputs, str):
             inputs = [inputs]
         encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
         input_ids = encoded_inputs['input_ids'].to(self.device)
         attention_mask = encoded_inputs['attention_mask'].to(self.device)
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask=attention_mask)
             logits = outputs.logits
             probabilities = torch.nn.functional.softmax(logits, dim=-1)
             predictions = torch.argmax(probabilities, dim=-1)
         class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
         results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
-        return {"predictions": results}
-handler = EndpointHandler()

 import os
 import torch
 from joblib import load
+from transformers import BertTokenizer
 class EndpointHandler:
     def __init__(self, path=""):
+        self.model = load(os.path.join(path, "model.joblib"))
         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
     def __call__(self, data):
         inputs = data.pop("inputs", data)
+        # Ensure inputs is a list
         if isinstance(inputs, str):
             inputs = [inputs]
+        # Tokenize inputs
         encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
+        # Move inputs to the correct device
         input_ids = encoded_inputs['input_ids'].to(self.device)
         attention_mask = encoded_inputs['attention_mask'].to(self.device)
+        # Perform inference
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask=attention_mask)
             logits = outputs.logits
             probabilities = torch.nn.functional.softmax(logits, dim=-1)
             predictions = torch.argmax(probabilities, dim=-1)
+        # Convert predictions to human-readable labels
         class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
         results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
+        return {"predictions": results}