R0obin's picture
Create app.py
721f12c verified
import streamlit as st
import pickle
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
# Initialize PorterStemmer
ps = PorterStemmer()
# Load the pre-trained model and TF-IDF vectorizer
with open('model.pkl', 'rb') as fil:
model = pickle.load(fil)
with open('vectorized.pkl', 'rb') as file:
tfidf = pickle.load(file)
# Define the text preprocessing function
def update_text(text):
text = text.lower()
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y[:]
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation:
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i))
return " ".join(y)
# Streamlit application title
st.title("Email/SMS Spam Classifier")
# Collecting the SMS text with a larger text area
input_sms = st.text_area("Write the Message", height=150)
# Add a button to trigger the prediction
if st.button("Predict"):
# Preprocessing the text
transformed_sms = update_text(input_sms)
# Ensure the transformed SMS is not empty before vectorizing
if transformed_sms.strip():
# Vectorizing the SMS
vectorized_input = tfidf.transform([transformed_sms])
# Convert the sparse matrix to a dense format
vectorized_input_dense = vectorized_input.toarray()
# Predicting
try:
result = model.predict(vectorized_input_dense)[0]
if result == 1:
st.header("Spam")
else:
st.header("Not Spam")
except Exception as e:
st.error(f"Error during prediction: {e}")
else:
st.warning("Please enter a valid message.")