File size: 2,341 Bytes
e9ab3e1
 
7e8cccb
e9ab3e1
 
 
 
 
 
 
 
7e8cccb
 
 
e9ab3e1
1d64e70
e9ab3e1
 
1d64e70
e9ab3e1
 
 
1d64e70
e9ab3e1
 
 
1d64e70
e9ab3e1
 
1d64e70
e9ab3e1
 
 
 
 
 
 
 
7e8cccb
 
e9ab3e1
 
 
 
 
7e8cccb
 
e9ab3e1
7e8cccb
 
c01c8de
 
 
 
7e8cccb
 
 
 
 
 
 
 
e9ab3e1
7e8cccb
 
 
 
 
 
1d64e70
7e8cccb
 
643bbf7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
import numpy as np
from flask import Flask, request, jsonify
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Initialize Flask app
app = Flask(__name__)

# Read dataset
df = pd.read_csv(r"spam.csv")

# Define feature and target variables
x = df["Message"]
y = df["Category"]

# Create a Bag of Words (BoW) model
bow = CountVectorizer(stop_words="english")
final_data = pd.DataFrame(bow.fit_transform(x).toarray(), columns=bow.get_feature_names_out())

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(final_data, y, test_size=0.2, random_state=20)

# Initialize models
models = {
    "Naive Bayes": MultinomialNB(),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "SVM": SVC()
}

# Choose and train a model
model_choice = "Naive Bayes"  # Default model
obj = models[model_choice]
obj.fit(x_train, y_train)
y_pred = obj.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

# Print accuracy for initial check
print(f"Accuracy of {model_choice}: {accuracy:.4f}")

@app.route('/predict', methods=['GET'])
def predict_spam():
    """
    This endpoint predicts whether the email is Spam or Ham.
    Query parameter: email (str) - The email text to be classified.
    """
    email = request.args.get('email')
    
    if email:
        data = bow.transform([email]).toarray()  # Transform email using the Bag of Words vectorizer
        prediction = obj.predict(data)[0]  # Get the prediction (Spam or Ham)
        return jsonify({"prediction": prediction})  # Return prediction as JSON
    else:
        return jsonify({"error": "Please provide an 'email' query parameter."}), 400

@app.route('/accuracy', methods=['GET'])
def get_accuracy():
    """ 
    Endpoint to check the accuracy of the selected model on the test data.
    """
    return jsonify({"accuracy": accuracy})

# Run Flask app
if __name__ == '__main__':
    app.run(host='127.0.0.1', port=5001, debug=True)