import streamlit as st
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import contractions
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Unduh sumber daya NLTK
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Muat model
model = load_model('best_model.keras')

# Definisikan stopwords dan lemmatizer
stopwords_nltk = list(set(stopwords.words('english')))
stopwords_add = ['hotel', 'room']
stopwords_all = stopwords_nltk + stopwords_add
lemmatizer = WordNetLemmatizer()

# Muat atau definisikan tokenizer
tokenizer = Tokenizer()
# Fit the tokenizer on your training data
# Contoh:
# tokenizer.fit_on_texts(training_texts)

# Definisikan fungsi pre-processing
def text_preprocessing(document):
    # Mengubah teks menjadi huruf kecil
    document = document.lower()
    # Memperbaiki kontraksi
    document = contractions.fix(document)
    # Menghapus tanda baca
    document = re.sub(f'[{re.escape(string.punctuation)}]', '', document)
    # Menghapus angka
    document = re.sub(r'\w*\d\w*', '', document)
    # Menghapus karakter non-ASCII
    document = re.sub('[^\x00-\x7f]', '', document)
    # Menghapus kata-kata pendek
    document = re.sub(r'\b\w{1,3}\b', ' ', document)
    # Menghapus spasi berlebih
    document = document.strip()
    # Tokenisasi
    tokens = word_tokenize(document)
    # Menghapus stopwords
    tokens = [word for word in tokens if word not in stopwords_all]
    # Lematisasi
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    # Menggabungkan token
    document = ' '.join(tokens)
    return document

def run():
    st.title("Analisis Sentimen Review")

    st.subheader("Prediksi")

    # Buat form input
    with st.form("Analisis Review"):
        review = st.text_area("Masukkan review:", "Masukkan review di sini", height=200)
        submitted = st.form_submit_button('Prediksi')

    df_inf = pd.DataFrame({'review': review}, index=[0])

    # Pre-proses review
    df_inf['review_processed'] = df_inf['review'].apply(lambda x: text_preprocessing(x))

    if submitted:
        # Pastikan review yang diproses tidak kosong
        if not df_inf['review_processed'][0]:
            st.write("Review yang dimasukkan terlalu pendek atau hanya berisi stopwords.")
            return

        # Buat input dengan bentuk yang diharapkan oleh TextVectorization
        X = np.array(df_inf['review_processed']).reshape(-1, 1)

        # Prediksi sentimen
        y_pred_inf_proba = model.predict(X)
        y_pred_inf = np.argmax(y_pred_inf_proba, axis=-1)

        # Tampilkan hasil prediksi
        if y_pred_inf == 0:
            st.write('Review ini negatif.')
        elif y_pred_inf == 1:
            st.write('Review ini netral.')
        else:
            st.write('Review ini positif.')

if __name__ == "__main__":
    run()