latest

Browse files

Files changed (4) hide show

.DS_Store +0 -0
endpoint.py +0 -28
handler.py +22 -25
requirements.txt +3 -3

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

endpoint.py DELETED Viewed

@@ -1,28 +0,0 @@
-from huggingface_hub import InferenceClient, create_inference_endpoint
-# Create the inference endpoint
-endpoint = create_inference_endpoint(
-    name="my-custom-endpoint",
-    repository="path/to/your/model/repository",
-    framework="custom",
-    task="text-classification",
-    accelerator="cpu",  # or "gpu" if needed
-    instance_size="medium",
-    instance_type="c6i",
-    region="us-east-1",
-    custom_image={
-        "health_route": "/healthz",
-        "port": 8080,
-        "url": "your-docker-image-url:latest"
-    }
-)
-# Wait for the endpoint to be ready
-endpoint.wait()
-# Create a client to interact with the endpoint
-client = InferenceClient(endpoint.url)
-# Test the endpoint
-result = client.text_classification("This is a test input")
-print(result)

handler.py CHANGED Viewed

@@ -1,38 +1,35 @@
-import os
-import torch
 from joblib import load
 from transformers import BertTokenizer
 class EndpointHandler:
     def __init__(self, path=""):
-        self.model = load(os.path.join(path, "model.joblib"))
         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-    def __call__(self, data):
-        inputs = data.pop("inputs", data)
-        # Ensure inputs is a list
-        if isinstance(inputs, str):
-            inputs = [inputs]
-        # Tokenize inputs
-        encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
-        # Move inputs to the correct device
-        input_ids = encoded_inputs['input_ids'].to(self.device)
-        attention_mask = encoded_inputs['attention_mask'].to(self.device)
         # Perform inference
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask=attention_mask)
             logits = outputs.logits
-            probabilities = torch.nn.functional.softmax(logits, dim=-1)
-            predictions = torch.argmax(probabilities, dim=-1)
-        # Convert predictions to human-readable labels
         class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
-        results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
-        return {"predictions": results}

 from joblib import load
 from transformers import BertTokenizer
+import torch
+import torch.nn.functional as F
+from typing import Dict, Any
 class EndpointHandler:
     def __init__(self, path=""):
+        # Load the model
+        self.model = load(f"{path}/model.joblib")
+        self.model.eval()
+        # Load the tokenizer
         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        # Extract input text
+        text = data.get("inputs", "")
+        # Tokenize the input text
+        encoding = self.tokenizer(text, truncation=True, padding=True, max_length=128, return_tensors='pt')
+        input_ids = encoding['input_ids']
+        attention_mask = encoding['attention_mask']
         # Perform inference
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask=attention_mask)
             logits = outputs.logits
+            probabilities = F.softmax(logits, dim=-1)
+            confidence, predicted_class = torch.max(probabilities, dim=-1)
+        # Map predicted class to label
         class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
+        predicted_label = class_names[predicted_class.item()]
+        confidence_score = confidence.item()
+        return {"label": predicted_label, "confidence": confidence_score}

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-torch==1.9.0
-transformers==4.44.2
-joblib==1.1.0

+torch
+transformers
+joblib