Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import sklearn | |
from sklearn.model_selection import train_test_split | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.svm import SVC | |
from sklearn.metrics import accuracy_score | |
from fastapi import FastAPI | |
from fastapi.responses import JSONResponse | |
import threading | |
# Read dataset | |
df = pd.read_csv(r"spam.csv") | |
# Initialize Streamlit app | |
st.title("Identifying Spam and Ham Emails") | |
# Define feature and target variables | |
x = df["Message"] | |
y = df["Category"] | |
# Create a Bag of Words (BoW) model | |
bow = CountVectorizer(stop_words="english") | |
final_data = pd.DataFrame(bow.fit_transform(x).toarray(), columns=bow.get_feature_names_out()) | |
# Train-test split | |
x_train, x_test, y_train, y_test = train_test_split(final_data, y, test_size=0.2, random_state=20) | |
# Initialize models | |
models = { | |
"Naive Bayes": MultinomialNB(), | |
"KNN": KNeighborsClassifier(), | |
"Logistic Regression": LogisticRegression(), | |
"Decision Tree": DecisionTreeClassifier(), | |
"SVM": SVC() | |
} | |
# Model selection | |
model_choice = st.selectbox("Choose a Classification Algorithm", list(models.keys())) | |
# Train the selected model | |
obj = models[model_choice] | |
obj.fit(x_train, y_train) | |
y_pred = obj.predict(x_test) | |
accuracy = accuracy_score(y_test, y_pred) | |
# Display accuracy | |
if st.button("Show Accuracy"): | |
st.write(f"Accuracy of {model_choice}: {accuracy:.4f}") | |
# Email input and prediction function | |
email_input = st.text_input("Enter an Email for Prediction") | |
def predict_email(email): | |
data = bow.transform([email]).toarray() | |
prediction = obj.predict(data)[0] | |
st.write(f"Prediction: {prediction}") | |
if st.button("Predict Email"): | |
if email_input: | |
predict_email(email_input) | |
else: | |
st.write(":red[Please enter an email to classify]") | |
# FastAPI app to handle GET requests | |
app = FastAPI() | |
def predict_spam(email: str): | |
""" | |
This endpoint predicts whether the email is Spam or Ham. | |
Query parameter: email (str) - The email text to be classified. | |
""" | |
data = bow.transform([email]).toarray() | |
prediction = obj.predict(data)[0] | |
return JSONResponse(content={"prediction": prediction}) | |
# Running FastAPI in a separate thread to work alongside Streamlit | |
def run_api(): | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |
# Start FastAPI in a separate thread | |
api_thread = threading.Thread(target=run_api, daemon=True) | |
api_thread.start() | |
# You can also check API response using the link below: | |
# http://localhost:8000/predict/?email=Your_email_text_here | |