import torch import joblib from flask import Flask, request, jsonify from transformers import BertTokenizer, BertForSequenceClassification import torch.nn.functional as F # Initialize Flask application app = Flask(__name__) # Load model and tokenizer model = joblib.load('model.joblib') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model.eval() # Set device to CUDA if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Inference function def classify_text(text): encoding = tokenizer(str(text), truncation=True, padding=True, max_length=128, return_tensors='pt') input_ids = encoding['input_ids'].to(device) attention_mask = encoding['attention_mask'].to(device) with torch.no_grad(): outputs = model(input_ids, attention_mask=attention_mask) logits = outputs.logits probabilities = F.softmax(logits, dim=-1) confidence, predicted_class = torch.max(probabilities, dim=-1) class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"] predicted_label = class_names[predicted_class.item()] confidence_score = confidence.item() return predicted_label, confidence_score # Define the inference route @app.route('/inference', methods=['POST']) def inference(): data = request.json if 'text' not in data: return jsonify({"error": "No text provided"}), 400 text = data['text'] label, confidence = classify_text(text) return jsonify({ 'text': text, 'classification': label, 'confidence': confidence }) # Start the Flask server if __name__ == '__main__': app.run(host='0.0.0.0', port=8080)