Sameer
Update app.py
f2bdb97
import streamlit as st
import pandas as pd
from keras import Sequential
from keras.layers import Dense,Embedding,Flatten
from keras.utils import pad_sequences
from keras.preprocessing.text import Tokenizer
st.title("Spam-NonSpam Detector")
Input=st.text_area("Input the message below")
if st.button("Check"):
st.text("Process may take upto a minute. Please be patient. Thank you!")
df=pd.read_csv("mail_data.csv")
df.loc[df['Category'] == 'spam', 'Category'] = 0
df.loc[df['Category'] == 'ham', 'Category'] = 1
X = df['Message']
Y = df['Category']
from keras.utils import pad_sequences
tokenizer = Tokenizer()
docs=X.astype("string")
tokenizer.fit_on_texts(docs)
sequences = tokenizer.texts_to_sequences(docs)
sequences = pad_sequences(sequences,padding='post',maxlen=61)
voc_size=len(tokenizer.word_index)
model = Sequential()
model.add(Embedding(voc_size+1,2,input_length=61))
model.add(Flatten())
model.add(Dense(5,activation="relu"))
model.add(Dense(5,activation="relu"))
model.add(Dense(1, activation='sigmoid'))
X=sequences
Y=Y.to_numpy()
Y=Y.astype("int")
Y=Y.reshape(-1,1)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.fit(X,Y,epochs=21)
Input=[Input]
seq=tokenizer.texts_to_sequences(Input)
inp=pad_sequences(seq,padding='post',maxlen=61)
a=model.predict(inp)
value=a[0][0]
st.text("Input:")
st.markdown(Input[0])
st.text("Output:")
if (value>0.5):
st.text('Non-spam message')
else:
st.text('Spam message')