menimeni123
/

helem-llm

Joblib

Safetensors

distilbert

Inference Endpoints

Model card Files Files and versions Community

menimeni123 commited on Sep 12, 2024

Commit

b36a521

1 Parent(s): 1ad94a4

latest

Browse files

Files changed (1) hide show

handler.py → app.py +36 -30

handler.py → app.py RENAMED Viewed

@@ -1,49 +1,55 @@
-import os
 import torch
-from transformers import BertTokenizer
-from joblib import load
 import torch.nn.functional as F
-# Load tokenizer (ensure it's the same one used in training)
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-# Load the model (ensure the same model that was trained)
-current_dir = os.path.dirname(os.path.abspath(__file__))
-model_path = os.path.join(current_dir, "model.joblib")
-print(f"Loading model from: {model_path}")
-model = load(model_path)
-# Set the model to evaluation mode
 model.eval()
-# Define class names (same as used during training)
-class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
-# Inference function that will be called by Hugging Face Inference API
 def classify_text(text):
     encoding = tokenizer(str(text), truncation=True, padding=True, max_length=128, return_tensors='pt')
-    input_ids = encoding['input_ids']
-    attention_mask = encoding['attention_mask']
-    # Move tensors to device if needed
-    if torch.cuda.is_available():
-        input_ids = input_ids.cuda()
-        attention_mask = attention_mask.cuda()
-        model.cuda()
     with torch.no_grad():
         outputs = model(input_ids, attention_mask=attention_mask)
         logits = outputs.logits
         probabilities = F.softmax(logits, dim=-1)
         confidence, predicted_class = torch.max(probabilities, dim=-1)
     predicted_label = class_names[predicted_class.item()]
     confidence_score = confidence.item()
-    return {"label": predicted_label, "confidence": confidence_score}
-# The inference API calls this method with a JSON request.
-def inference(inputs):
-    text = inputs["inputs"]
-    return classify_text(text)

 import torch
+import joblib
+from flask import Flask, request, jsonify
+from transformers import BertTokenizer, BertForSequenceClassification
 import torch.nn.functional as F
+# Initialize Flask application
+app = Flask(__name__)
+# Load model and tokenizer
+model = joblib.load('model.joblib')
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 model.eval()
+# Set device to CUDA if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Inference function
 def classify_text(text):
     encoding = tokenizer(str(text), truncation=True, padding=True, max_length=128, return_tensors='pt')
+    input_ids = encoding['input_ids'].to(device)
+    attention_mask = encoding['attention_mask'].to(device)
     with torch.no_grad():
         outputs = model(input_ids, attention_mask=attention_mask)
         logits = outputs.logits
         probabilities = F.softmax(logits, dim=-1)
         confidence, predicted_class = torch.max(probabilities, dim=-1)
+    class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
     predicted_label = class_names[predicted_class.item()]
     confidence_score = confidence.item()
+    return predicted_label, confidence_score
+# Define the inference route
+@app.route('/inference', methods=['POST'])
+def inference():
+    data = request.json
+    if 'text' not in data:
+        return jsonify({"error": "No text provided"}), 400
+    text = data['text']
+    label, confidence = classify_text(text)
+    return jsonify({
+        'text': text,
+        'classification': label,
+        'confidence': confidence
+    })
+# Start the Flask server
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8080)