Spaces:
Build error
Build error
import streamlit as st | |
import pickle | |
import nltk | |
import string | |
from nltk.corpus import stopwords | |
from nltk.stem.porter import PorterStemmer | |
# Initialize PorterStemmer | |
ps = PorterStemmer() | |
# Load the pre-trained model and TF-IDF vectorizer | |
with open('model.pkl', 'rb') as fil: | |
model = pickle.load(fil) | |
with open('vectorized.pkl', 'rb') as file: | |
tfidf = pickle.load(file) | |
# Define the text preprocessing function | |
def update_text(text): | |
text = text.lower() | |
text = nltk.word_tokenize(text) | |
y = [] | |
for i in text: | |
if i.isalnum(): | |
y.append(i) | |
text = y[:] | |
y.clear() | |
for i in text: | |
if i not in stopwords.words('english') and i not in string.punctuation: | |
y.append(i) | |
text = y[:] | |
y.clear() | |
for i in text: | |
y.append(ps.stem(i)) | |
return " ".join(y) | |
# Streamlit application title | |
st.title("Email/SMS Spam Classifier") | |
# Collecting the SMS text with a larger text area | |
input_sms = st.text_area("Write the Message", height=150) | |
# Add a button to trigger the prediction | |
if st.button("Predict"): | |
# Preprocessing the text | |
transformed_sms = update_text(input_sms) | |
# Ensure the transformed SMS is not empty before vectorizing | |
if transformed_sms.strip(): | |
# Vectorizing the SMS | |
vectorized_input = tfidf.transform([transformed_sms]) | |
# Convert the sparse matrix to a dense format | |
vectorized_input_dense = vectorized_input.toarray() | |
# Predicting | |
try: | |
result = model.predict(vectorized_input_dense)[0] | |
if result == 1: | |
st.header("Spam") | |
else: | |
st.header("Not Spam") | |
except Exception as e: | |
st.error(f"Error during prediction: {e}") | |
else: | |
st.warning("Please enter a valid message.") | |