Sameer
Update app.py
690bfee
raw
history blame
1.51 kB
import streamlit as st
import pandas as pd
from keras import Sequential
from keras.layers import Dense,Embedding
from keras.utils import pad_sequences
from keras.preprocessing.text import Tokenizer
st.title("Spam-NonSpam Detector")
Input=st.text_input("Input","Write here...")
if st.button("Check"):
st.text("Process may take upto a minute. Please be patient. Thank you!")
df=pd.read_csv("mail_data.csv")
df.loc[mail_data['Category'] == 'spam', 'Category'] = 0
df.loc[mail_data['Category'] == 'ham', 'Category'] = 1
X = df['Message']
Y = df['Category']
from keras.utils import pad_sequences
tokenizer = Tokenizer()
docs=X.astype("string")
tokenizer.fit_on_texts(docs)
sequences = tokenizer.texts_to_sequences(docs)
sequences = pad_sequences(sequences,padding='post',maxlen=61)
voc_size=len(tokenizer.word_index)
model = Sequential()
model.add(Embedding(voc_size+1,2,input_length=61))
model.add(Dense(5,activation="relu"))
model.add(Dense(5,activation="relu"))
model.add(Dense(1, activation='sigmoid'))
X=sequences
Y=Y.to_numpy()
Y=Y.astype("int")
Y=Y.reshape(-1,1)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.fit(X,Y,epochs=21)
InputDataFeatures=cv.transform([Input])
prediction=model.predict(InputDataFeatures)
st.text("Input:")
st.markdown(Input)
st.text("Output:")
if prediction==0:
st.text("Spam")
else:
st.text("Not Spam")