# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr

# import string
import string

# import countvectorizer
from sklearn.feature_extraction.text import CountVectorizer

# import train_test_split
from sklearn.model_selection import train_test_split

# import multinomial naive bayes
from sklearn.naive_bayes import MultinomialNB

# read data from csv file
df = pd.read_csv('dataset/spam.tsv', sep='\t', names=['label', 'message'])

# check features
features = ['spam', 'ham']

# write a function to remove punctuations from meassages
def remove_punctuation(text):
    no_punct = [char for char in text if char not in string.punctuation]
    no_punct = ''.join(no_punct)
    return no_punct

# apply the function to the message column
df['message'] = df['message'].apply(remove_punctuation)

# after removing punctuations, check the length of the message and also description of the message
df['length'] = df['message'].apply(len)

# apply countvectorizer to the message column
CV = CountVectorizer(stop_words='english')

# assign the contents of each 'message' to X and 'label' to y
X = df['message'].values
y = df['label'].values

# split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# fit the conutervectorizer transformer to the training data
X_train_CV = CV.fit_transform(X_train)

# fit the countvectorizer transformer to the testing data
X_test_CV = CV.transform(X_test)

# create an instance of the classifier
NB = MultinomialNB()

# fit the classifier to the training data
NB.fit(X_train_CV, y_train)

# test the accuracy with test data
y_pred = NB.predict(X_test_CV)

# write a function that will take a string as input and return the prediction
def predict_spam(message):
    message = CV.transform([message])
    prediction = NB.predict(message)
    if prediction == 'ham':
        message = 'This is a ham message'
    else:
        message = 'This is a spam message'
    return message

iface = gr.Interface(
    fn=predict_spam, 
    inputs=gr.Textbox(lines=2, placeholder="Enter a message to check if it is spam or ham", label="Message", info = "Enter a message"),
    outputs=gr.Textbox(lines=2, info="Check if the enetered message is spam or ham", label="Prediction", placeholder = "Output will be here.."),
    title="Spam Classifier", 
    description="Enter a message to check if it is spam or ham", 
    allow_flagging='never', 
    examples=[['Hey, how are you doing?'], ['Congratulations! You have won a free trip to Dubai!']])
iface.launch()