Spaces:

zakyirhab0
/

Klasifikasi_Komentar

Sleeping

App Files Files Community

zakyirhab0 commited on Jan 7

Commit

8045cec

verified ·

1 Parent(s): b5b95ed

Upload 22 files

Browse files

Files changed (22) hide show

app.py +1548 -0
best_rf_model_indah_dhamayanti_putri.joblib +3 -0
best_rf_model_lalu_muhamad_iqbal.joblib +3 -0
best_rf_model_m_suhaili.joblib +3 -0
best_rf_model_musyafirin.joblib +3 -0
best_rf_model_sitti_rohmi_djalilah.joblib +3 -0
best_rf_model_zulkieflimansyah.joblib +3 -0
datasetntbnew_indah_dhamayanti_putri.xlsx +0 -0
datasetntbnew_lalu_muhamad_iqbal.xlsx +0 -0
datasetntbnew_m_suhaili.xlsx +0 -0
datasetntbnew_musyafirin.xlsx +0 -0
datasetntbnew_sitti_rohmi_djalilah.xlsx +0 -0
datasetntbnew_zulkieflimansyah.xlsx +0 -0
kamusalay.csv +238 -0
keywords.json +37 -0
ntb_dict.json +396 -0
tfidf_vectorizer_indah_dhamayanti_putri.joblib +3 -0
tfidf_vectorizer_lalu_muhamad_iqbal.joblib +3 -0
tfidf_vectorizer_m_suhaili.joblib +3 -0
tfidf_vectorizer_musyafirin.joblib +3 -0
tfidf_vectorizer_sitti_rohmi_djalilah.joblib +3 -0
tfidf_vectorizer_zulkieflimansyah.joblib +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,1548 @@

+import streamlit as st
+import joblib
+import pandas as pd
+import re
+import emoji
+import json
+import io
+import unicodedata
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score, f1_score
+from imblearn.over_sampling import SMOTE
+import matplotlib.pyplot as plt
+import os
+from wordcloud import WordCloud
+from sklearn.metrics import classification_report, accuracy_score, f1_score
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.pipeline import Pipeline
+from transformers import pipeline
+from collections import Counter
+import nltk
+from nltk.corpus import stopwords
+from datetime import datetime
+# === Preprocessing Functions === #
+candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
+# Unduh stopwords jika belum terunduh
+nltk.download('stopwords')
+stop_words = set(stopwords.words('indonesian'))
+# Memastikan data "BA Lainnya" tersedia
+if 'data_with_ba' in st.session_state:
+    ba_lainnya_data = st.session_state['data_with_ba']
+else:
+    ba_lainnya_data = None
+def translate_emojis(text):
+    return ''.join(c for c in text if not emoji.is_emoji(c))  # Remove all emojis
+def normalize_unicode(text):
+    return unicodedata.normalize('NFKD', text)
+def clean_text(text):
+    text = str(text).casefold()  # Convert to lowercase
+    text = re.sub(r'http\S+|www\S+', '', text)  # Remove URLs
+    text = re.sub(r'[^a-z\s]', '', text)  # Remove non-alphabetic characters
+    text = re.sub(r'\s+', ' ', text).strip()  # Normalize spaces
+    return text
+def handle_negation(text):
+    negation_words = {"tidak", "bukan", "jangan", "belum", "kurang", "gagal", "sulit"}
+    words = text.split()
+    result = []
+    skip_next = False
+    for i, word in enumerate(words):
+        if word in negation_words and i + 1 < len(words):
+            result.append(f"{word}_{words[i + 1]}")  # Combine negation with next word
+            skip_next = True
+        elif skip_next:
+            skip_next = False
+        else:
+            result.append(word)
+    return ' '.join(result)
+def handle_replies(text):
+    text = re.sub(r'=--*@\w+', '', text)  # Remove multi-level reply patterns
+    text = re.sub(r'=-*@\w+', '', text)   # Remove single-level reply patterns
+    text = re.sub(r'@\w+', '', text)      # Remove standalone @username mentions
+    return text
+def translate_text(text, dictionary):
+    words = text.split()
+    return ' '.join([dictionary.get(word.lower(), word) for word in words])  # Translate words using dictionary
+# Fungsi untuk menetapkan sentimen berdasarkan kata kunci
+def assign_sentiment_based_on_keywords(comment, keyword_dict):
+    for sentiment, keywords in keyword_dict.items():
+        if any(keyword in comment for keyword in keywords):
+            return sentiment
+    return 'unknown'
+# === Load Dictionaries === #
+def load_dictionary(file_path, file_type='json'):
+    if file_type == 'json':
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return json.load(file)
+    elif file_type == 'csv':
+        df = pd.read_csv(file_path, names=['slang', 'formal'])
+        return pd.Series(df['formal'].values, index=df['slang']).to_dict()
+ntb_dict = load_dictionary('/content/ntb_dict.json', 'json')
+slang_dict = load_dictionary('/content/kamusalay.csv', 'csv')
+# === Utility Functions === #
+# Fungsi untuk memperbarui file JSON dengan riwayat update
+def update_history_json(history_path, komentar, link, model_data, field, date):
+    # Konversi Timestamp menjadi string
+    if isinstance(date, pd.Timestamp):
+        date = date.strftime('%Y-%m-%d')
+    # Baca histori dari file JSON
+    try:
+        with open(history_path, 'r') as file:
+            history_data = json.load(file)
+    except (FileNotFoundError, json.JSONDecodeError):
+        history_data = {}
+    # Kunci riwayat untuk komentar, link, dan model data tertentu
+    key = f"{komentar}_{link}_{model_data}"
+    # Tambahkan riwayat baru ke kunci yang sesuai
+    if key in history_data:
+        history_data[key].append({field: date})
+    else:
+        history_data[key] = [{field: date}]
+    # Simpan kembali ke file JSON
+    with open(history_path, 'w') as file:
+        json.dump(history_data, file, indent=4)
+# Fungsi untuk memperbarui dataset pelatihan dengan data baru
+def update_training_dataset(output, candidate):
+    dataset_path = f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx"
+    history_path = f"history_{candidate.lower().replace(' ', '_')}.json"
+    try:
+        required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
+        output = output[required_columns].copy()
+        if 'predicted_category' in output.columns:
+            output['sentimen'] = output['predicted_category']
+            output.drop(columns=['predicted_category'], inplace=True)
+        output['tanggal_masuk'] = pd.Timestamp.now()
+        if os.path.exists(dataset_path):
+            existing_data = pd.read_excel(dataset_path)
+        else:
+            existing_data = pd.DataFrame(columns=required_columns)
+        # Menambahkan kolom 'update_ba' jika belum ada
+        if 'update_ba' not in existing_data.columns:
+            existing_data['update_ba'] = None
+        # Menambahkan kolom 'missing_comment' jika belum ada
+        if 'missing_comment' not in existing_data.columns:
+            existing_data['missing_comment'] = False
+        # Langkah 1: Memeriksa Komentar yang Hilang
+        train_comments = existing_data.groupby('link')['komentar'].apply(list).to_dict()
+        new_comments = output.groupby('link')['komentar'].apply(list).to_dict()
+        for link, comments in train_comments.items():
+            if link in new_comments:
+                new_comment_set = set(new_comments[link])
+                for comment in comments:
+                    if comment not in new_comment_set:
+                        existing_data.loc[(existing_data['link'] == link) & (existing_data['komentar'] == comment), 'missing_comment'] = True
+                    else:
+                        existing_data.loc[(existing_data['link'] == link) & (existing_data['komentar'] == comment), 'missing_comment'] = False
+        # Fungsi untuk memperbarui data
+        def update_data(existing_data, new_data, history_path):
+            for index, row in new_data.iterrows():
+                komentar = row['komentar']
+                link = row['link']
+                model_data = row['model_data']
+                tanggal_klasifikasi = pd.Timestamp.now()
+                # Komentar Sama Sudah Ada
+                existing_entry = existing_data[(existing_data['link'] == link) & (existing_data['komentar'] == komentar)]
+                if not existing_entry.empty:
+                    existing_data.loc[existing_entry.index, 'update_ba'] = tanggal_klasifikasi
+                    update_history_json(history_path, komentar, link, model_data, 'update_ba', tanggal_klasifikasi)
+                else:
+                    # Link Sama, Komentar Berbeda
+                    existing_link_entry = existing_data[(existing_data['link'] == link)]
+                    if not existing_link_entry.empty:
+                        new_row = row.copy()
+                        new_row['tanggal_masuk'] = tanggal_klasifikasi
+                        new_row['update_ba'] = tanggal_klasifikasi
+                        existing_data = pd.concat([existing_data, new_row.to_frame().T], ignore_index=True)
+                        update_history_json(history_path, komentar, link, model_data, 'tanggal_masuk', tanggal_klasifikasi)
+                        update_history_json(history_path, komentar, link, model_data, 'update_ba', tanggal_klasifikasi)
+                    else:
+                        # Link Baru
+                        new_row = row.copy()
+                        new_row['tanggal_masuk'] = tanggal_klasifikasi
+                        new_row['update_ba'] = tanggal_klasifikasi
+                        existing_data = pd.concat([existing_data, new_row.to_frame().T], ignore_index=True)
+                        update_history_json(history_path, komentar, link, model_data, 'tanggal_masuk', tanggal_klasifikasi)
+                        update_history_json(history_path, komentar, link, model_data, 'update_ba', tanggal_klasifikasi)
+            # Perbarui nilai None di update_ba dengan tanggal_masuk
+            existing_data['update_ba'] = pd.to_datetime(existing_data['update_ba'], errors='coerce')
+            existing_data['update_ba'].fillna(existing_data['tanggal_masuk'], inplace=True)
+            return existing_data
+        updated_data = update_data(existing_data, output, history_path)
+        updated_data.to_excel(dataset_path, index=False)
+        st.success(f"Data successfully updated in {candidate}'s training dataset.")
+        if 'missing_comment' in existing_data.columns and existing_data['missing_comment'].any():
+            st.subheader("Missing Comments")
+            st.write("Comments that were found to be missing:")
+            st.dataframe(existing_data[existing_data['missing_comment']])
+    except KeyError as e:
+        st.error(f"Missing column in the dataset: {e}")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+def clustering_based_evaluation(df, n_clusters=10):
+    st.write("Starting preprocessing...")
+    df['translated_emojis'] = df['komentar'].fillna('').astype(str).apply(translate_emojis)
+    df['normalized_unicode'] = df['translated_emojis'].apply(normalize_unicode)
+    df['reply_handled'] = df['normalized_unicode'].apply(handle_replies)
+    df['clean_text'] = df['reply_handled'].apply(clean_text)
+    df['translated_ntb'] = df['clean_text'].apply(lambda x: translate_text(x, ntb_dict))
+    df['translated_slang'] = df['translated_ntb'].apply(lambda x: translate_text(x, slang_dict))
+    df['negation_handled'] = df['translated_slang'].apply(handle_negation)
+    st.write("Generating TF-IDF vectors...")
+    tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
+    tfidf_matrix = tfidf_vectorizer.fit_transform(df['negation_handled'])
+    st.write(f"Clustering into {n_clusters} clusters...")
+    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+    df['Cluster'] = kmeans.fit_predict(tfidf_matrix)
+    st.write("Performing PCA for visualization...")
+    pca = PCA(n_components=2)
+    reduced_data = pca.fit_transform(tfidf_matrix.toarray())
+    df['PCA1'] = reduced_data[:, 0]
+    df['PCA2'] = reduced_data[:, 1]
+    st.write("Clustering completed successfully!")
+    return df
+def load_and_process_data(dataset_path, history_path):
+    df = pd.read_excel(dataset_path)
+    df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
+    if df['tanggal_masuk'].isnull().any():
+        st.warning("Some dates could not be parsed correctly. Please check the date format in the dataset.")
+        df['tanggal_masuk'].fillna(pd.Timestamp.now().strftime('%Y-%m-%d'), inplace=True)
+    required_columns = ['model_data', 'Platform', 'komentar', 'sentimen', 'tanggal', 'tanggal_masuk', 'evaluated_by_cluster']
+    for col in required_columns:
+        if col not in df.columns:
+            if col == 'tanggal_masuk':
+                df[col] = pd.Timestamp.now().strftime('%Y-%m-%d')
+            elif col == 'evaluated_by_cluster':
+                df[col] = False
+            else:
+                df[col] = None
+    df = df[required_columns]
+    try:
+        with open(history_path, "r") as f:
+            history = json.load(f)
+    except FileNotFoundError:
+        history = []
+    return df, history
+# Define the function to handle Special Cluster
+def handle_special_cluster(data, keywords, detector):
+    for index, row in data.iterrows():
+        text = row['negation_handled']
+        # Cek apakah teks mengandung salah satu kata kunci
+        detected = False
+        for sentiment, words in keywords.items():
+            if any(word in text for word in words):
+                data.loc[index, 'predicted_category'] = sentiment.replace('_', ' ')
+                data.loc[index, 'detected_by'] = 'keyword'
+                detected = True
+                break
+        if not detected:
+            # Jika tidak ada kata kunci yang cocok, gunakan deteksi sarkasme
+            result = detector(text)
+            if result[0]['label'] == 'SARCASM':
+                data.loc[index, 'predicted_category'] = 'co sarkastic'
+                data.loc[index, 'detected_by'] = 'sarcasm'
+            else:
+                # Jika tidak ada sentimen yang cocok
+                data.loc[index, 'predicted_category'] = 'Unknown'
+                data.loc[index, 'detected_by'] = 'unknown'
+    return data
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub(r'\W+', ' ', text)  # Remove non-alphanumeric characters
+    words = text.split()
+    words = [word for word in words if word not in stop_words]
+    return words
+def display_word_frequencies(words, num_words):
+    st.subheader(f"Top {num_words} Words")
+    for word, freq in words:
+        st.write(f"{word}: {freq}")
+def update_sentiment(index, new_sentimen):
+    df.loc[index, 'sentimen'] = new_sentimen
+    st.write(f"Updated sentiment for comment at index {index} to {new_sentimen}")
+# Fungsi untuk memuat data `Corrected Comments` dari file Excel
+def load_corrected_comments_from_excel(file_path):
+    try:
+        return pd.read_excel(file_path, sheet_name='Corrected Comments')
+    except FileNotFoundError:
+        return pd.DataFrame(columns=['model_data', 'Platform', 'komentar', 'tanggal', 'link', 'sentimen', 'Cluster_Name', 'corrected_by', 'tanggal_masuk'])
+    except Exception as e:
+        print(f"Error loading corrected comments: {e}")
+        return pd.DataFrame(columns=['model_data', 'Platform', 'komentar', 'tanggal', 'link', 'sentimen', 'Cluster_Name', 'corrected_by', 'tanggal_masuk'])
+# Fungsi untuk menyimpan data `Corrected Comments` ke file Excel
+def save_corrected_comments_to_excel(data, file_path):
+    with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
+        data.to_excel(writer, sheet_name='Corrected Comments', index=False)
+# Lokasi file Excel untuk menyimpan data `Corrected Comments`
+corrected_comments_file = 'corrected_comments.xlsx'
+# Fungsi untuk menampilkan visualisasi distribusi komentar dalam cluster
+def display_cluster_visualization(ba_lainnya_data):
+    st.subheader("Cluster Visualization")
+    # Menghitung jumlah komentar dalam kategori `Similar Sentiment` dan `Special Cluster`
+    cluster_counts = ba_lainnya_data[ba_lainnya_data['Cluster_Name'].str.contains('Similar|Special Cluster')]['Cluster_Name'].value_counts()
+    # Membuat grafik batang (bar chart)
+    plt.figure(figsize=(10, 6))
+    plt.bar(cluster_counts.index, cluster_counts.values, color=['blue', 'green', 'orange', 'red', 'purple'])
+    plt.xlabel('Cluster Name')
+    plt.ylabel('Number of Comments')
+    plt.title('Distribution of Comments in Similar Sentiment and Special Cluster')
+    plt.xticks(rotation=45)
+    plt.show()
+    st.pyplot(plt)
+def run_clustering_for_ba_lainnya():
+    st.title("Clustering for 'BA Lainnya'")
+    if 'data_with_ba' not in st.session_state:
+        st.error("No 'BA Lainnya' data found from the classification model. Please classify comments first.")
+        st.stop()
+    ba_lainnya_data = st.session_state['data_with_ba']
+    st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
+    with open('keywords.json', 'r') as f:
+        keyword_dict = json.load(f)
+    selected_candidate = st.session_state['candidate']
+    candidate_keywords = keyword_dict.get(selected_candidate.replace(' ', '_'))
+    if candidate_keywords is None:
+        st.error("Keywords for the selected candidate not found.")
+        st.stop()
+    sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
+    dataset_path = f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx"
+    corrected_comments_file = f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx"
+    try:
+        train_data = pd.read_excel(dataset_path)
+        labeled_data = train_data[train_data['sentimen'].isin(['Co Likes', 'Co Support', 'Co Optimism', 'Co Negative', 'Co Sarkastic'])]
+        st.write(f"**Labeled Data from Training Dataset:** {len(labeled_data)} rows")
+        ba_lainnya_data['Cluster'] = None
+        ba_lainnya_data['detected_by'] = None
+        ba_lainnya_data['Cluster_Name'] = None
+        ba_lainnya_data['corrected_by'] = "Not Corrected"
+        ba_lainnya_data['Sentiment'] = None  # Ubah inisialisasi ke None untuk memastikan tidak ada nilai default yang salah
+        ba_lainnya_data['Cluster_Name'] = ba_lainnya_data['Cluster_Name'].astype(str)
+        ba_lainnya_data['corrected_by'] = ba_lainnya_data['corrected_by'].astype(str)
+        ba_lainnya_data['Sentiment'] = ba_lainnya_data['Sentiment'].astype(str)
+        for data in [ba_lainnya_data, labeled_data]:
+            data['translated_emojis'] = data['komentar'].fillna('').astype(str).apply(translate_emojis)
+            data['normalized_unicode'] = data['translated_emojis'].apply(normalize_unicode)
+            data['reply_handled'] = data['normalized_unicode'].apply(handle_replies)
+            data['clean_text'] = data['reply_handled'].apply(clean_text)
+            data['translated_ntb'] = data['clean_text'].apply(lambda x: translate_text(x, {}))
+            data['translated_slang'] = data['translated_ntb'].apply(lambda x: translate_text(x, {}))
+            data['negation_handled'] = data['translated_slang'].apply(handle_negation)
+            data['negation_handled'] = data['negation_handled'].fillna('')
+        combined_data = ba_lainnya_data.copy()
+        combined_data['Label'] = 'BA Lainnya'
+        for sentimen in ['Co Likes', 'Co Support', 'Co Optimism', 'Co Negative', 'Co Sarkastic']:
+            sentimen_data = labeled_data[labeled_data['sentimen'] == sentimen].copy()
+            sentimen_data['Label'] = sentimen
+            combined = pd.concat([combined_data, sentimen_data], ignore_index=True)
+            if len(combined) < 2:
+                st.warning(f"Not enough samples to cluster for {sentimen}.")
+                continue
+            vectorizer = TfidfVectorizer(ngram_range=(1, 1), max_features=5000)
+            tfidf_matrix = vectorizer.fit_transform(combined['negation_handled'])
+            st.write(f"Clustering 'BA Lainnya' comments similar to {sentimen}...")
+            kmeans = KMeans(n_clusters=2, random_state=42)
+            combined['Cluster'] = kmeans.fit_predict(tfidf_matrix)
+            valid_indices = combined.index[:len(ba_lainnya_data)]
+            valid_indices = valid_indices.intersection(ba_lainnya_data.index)
+            ba_lainnya_data.loc[valid_indices, 'Cluster'] = combined.loc[valid_indices, 'Cluster']
+            ba_lainnya_data.loc[ba_lainnya_data['Cluster'] == 0, 'Cluster_Name'] = f"{sentimen} Similar"
+            ba_lainnya_data.loc[ba_lainnya_data['Cluster'] == 1, 'Cluster_Name'] = f"{sentimen} Dissimilar"
+            ba_lainnya_data.loc[valid_indices, 'Sentiment'] = sentimen
+        for index, row in ba_lainnya_data.iterrows():
+            if row['Cluster_Name'].endswith('Dissimilar') or row['Cluster_Name'] == 'None':
+                dissimilar_comment = ba_lainnya_data.loc[[index]].copy()
+                for sentimen in ['Co Likes', 'Co Support', 'Co Optimism', 'Co Negative', 'Co Sarkastic']:
+                    sentimen_data = labeled_data[labeled_data['sentimen'] == sentimen].copy()
+                    combined = pd.concat([dissimilar_comment, sentimen_data], ignore_index=True)
+                    if len(combined) < 2:
+                        continue
+                    tfidf_matrix = vectorizer.fit_transform(combined['negation_handled'])
+                    if tfidf_matrix.shape[0] == 0:
+                        continue
+                    kmeans = KMeans(n_clusters=2, random_state=42)
+                    combined['Cluster'] = kmeans.fit_predict(tfidf_matrix)
+                    if len(combined) > 0 and combined.loc[0, 'Cluster'] == 0:
+                        ba_lainnya_data.loc[index, 'Cluster_Name'] = f"{sentimen} Similar"
+                        ba_lainnya_data.loc[index, 'Sentiment'] = sentimen
+                        break
+                else:
+                    ba_lainnya_data.loc[index, 'Cluster_Name'] = 'Special Cluster'
+                    ba_lainnya_data.loc[index, 'corrected_by'] = 'Special Cluster'
+                    ba_lainnya_data.loc[index, 'Sentiment'] = 'Special Sentiment'
+        ba_lainnya_data['Cluster_Name'] = ba_lainnya_data['Cluster_Name'].apply(lambda x: 'Special Cluster' if x == 'nan' else x)
+        special_cluster_data = ba_lainnya_data[ba_lainnya_data['Cluster_Name'] == 'Special Cluster']
+        if not special_cluster_data.empty:
+            special_cluster_data = handle_special_cluster(special_cluster_data, candidate_keywords, sarcasm_detector)
+            ba_lainnya_data.update(special_cluster_data)
+            ba_lainnya_data.loc[special_cluster_data.index, 'corrected_by'] = 'Special Cluster'
+            ba_lainnya_data.loc[special_cluster_data.index, 'Sentiment'] = 'Special Sentiment'
+            st.warning("Some comments were not captured by the current keywords. Please add new keywords in the 'Update Keywords' section.")
+            st.subheader("Detection Distribution in Special Cluster")
+            detection_counts = special_cluster_data['detected_by'].value_counts()
+            plt.figure(figsize=(10, 6))
+            plt.bar(detection_counts.index, detection_counts.values, color=['blue', 'orange', 'red'])
+            plt.xlabel('Detection Method')
+            plt.ylabel('Number of Comments')
+            plt.title('Detection Distribution in Special Cluster')
+            plt.show()
+            st.pyplot(plt)
+            st.write("Top Keywords in Special Cluster")
+            for sentiment, keywords in candidate_keywords.items():
+                st.write(f"{sentiment}: {', '.join(keywords)}")
+            st.subheader("Special Cluster Details")
+            st.dataframe(special_cluster_data[['komentar', 'Cluster_Name', 'detected_by']])
+        corrected_comments = load_corrected_comments_from_excel(corrected_comments_file)
+        display_cluster_visualization(ba_lainnya_data)
+        st.subheader("Search and Filter Clusters")
+        search_term = st.text_input("Enter a keyword to search for in the comments:")
+        if search_term:
+            filtered_data = ba_lainnya_data[ba_lainnya_data['komentar'].str.contains(search_term, case=False, na=False)]
+            st.write(f"Filtered Data (Showing first 100 rows) for search term '{search_term}':")
+            st.dataframe(filtered_data.head(100))
+        else:
+            st.dataframe(ba_lainnya_data.head(100))
+        st.subheader("Clustered Data")
+        selected_cluster = st.selectbox("Select a cluster to view comments:", sorted(ba_lainnya_data['Cluster_Name'].unique()))
+        cluster_comments = ba_lainnya_data[ba_lainnya_data['Cluster_Name'] == selected_cluster]
+        st.dataframe(cluster_comments[['komentar', 'Cluster_Name']].head(100))
+        new_sentimen = st.selectbox("Select new sentiment for this cluster:", ['Co Likes', 'Co Support', 'Co Optimism', 'Co Negative', 'Co Sarkastic'])
+        if st.button("Update Sentiment for this cluster"):
+            ba_lainnya_data.loc[ba_lainnya_data['Cluster_Name'] == selected_cluster, 'corrected_by'] = 'Batch Cluster'
+            ba_lainnya_data.loc[ba_lainnya_data['Cluster_Name'] == selected_cluster, 'sentimen'] = new_sentimen
+            st.success(f"Sentiment for cluster {selected_cluster} updated to {new_sentimen}")
+            # Save and refresh Corrected Comments table and Cluster Visualization
+            corrected_comments = pd.concat([corrected_comments, ba_lainnya_data[ba_lainnya_data['corrected_by'] != "Not Corrected"]])
+            corrected_comments.drop_duplicates(subset=['komentar'], keep='last', inplace=True)
+            save_corrected_comments_to_excel(corrected_comments, corrected_comments_file)
+            st.subheader("Corrected Comments")
+            st.dataframe(corrected_comments[['komentar', 'Cluster_Name', 'corrected_by', 'sentimen']].head(100))
+            display_cluster_visualization(ba_lainnya_data)
+        st.subheader("Special Rules Based on Keywords")
+        keyword = st.text_input("Enter a keyword to set a rule:")
+        specific_cluster = st.selectbox("Select a cluster for this keyword:", sorted(ba_lainnya_data['Cluster_Name'].unique()))
+        if keyword:
+            new_cluster = st.selectbox("Select sentiment for this keyword:", ['Co Likes', 'Co Support', 'Co Optimism', 'Co Negative', 'Co Sarkastic'])
+            if st.button("Apply Rule"):
+                ba_lainnya_data.loc[ba_lainnya_data['komentar'].str.contains(keyword, case=False, na=False), 'Cluster_Name'] = new_cluster
+                ba_lainnya_data.loc[ba_lainnya_data['komentar'].str.contains(keyword, case=False, na=False), 'detected_by'] = specific_cluster
+                ba_lainnya_data.loc[ba_lainnya_data['komentar'].str.contains(keyword, case=False, na=False), 'corrected_by'] = 'Keyword Rule'
+                ba_lainnya_data.loc[ba_lainnya_data['komentar'].str.contains(keyword, case=False, na=False), 'sentimen'] = new_cluster
+                st.success(f"All comments containing '{keyword}' have been updated to '{new_cluster}' sentiment.")
+                # Update keywords.json file to avoid duplicates
+                if selected_candidate.replace(' ', '_') in keyword_dict:
+                    if new_cluster in keyword_dict[selected_candidate.replace(' ', '_')]:
+                        if keyword not in keyword_dict[selected_candidate.replace(' ', '_')][new_cluster]:
+                            keyword_dict[selected_candidate.replace(' ', '_')][new_cluster].append(keyword)
+                    else:
+                        keyword_dict[selected_candidate.replace(' ', '_')][new_cluster] = [keyword]
+                else:
+                    keyword_dict[selected_candidate.replace(' ', '_')] = {new_cluster: [keyword]}
+                with open('keywords.json', 'w') as f:
+                    json.dump(keyword_dict, f)
+                st.success(f"Keyword '{keyword}' has been added to the keyword list.")
+                # Save and refresh Corrected Comments table and Cluster Visualization
+                corrected_comments = pd.concat([corrected_comments, ba_lainnya_data[ba_lainnya_data['corrected_by'] != "Not Corrected"]])
+                corrected_comments.drop_duplicates(subset=['komentar'], keep='last', inplace=True)
+                save_corrected_comments_to_excel(corrected_comments, corrected_comments_file)
+                st.subheader("Corrected Comments")
+                st.dataframe(corrected_comments[['komentar', 'Cluster_Name', 'corrected_by', 'sentimen']].head(100))
+                display_cluster_visualization(ba_lainnya_data)
+        st.subheader("Corrected Comments")
+        corrected_comments = load_corrected_comments_from_excel(corrected_comments_file)
+        st.dataframe(corrected_comments[['komentar', 'Cluster_Name', 'corrected_by', 'sentimen']].head(100))
+        st.subheader("Visual Representation of Corrected Comments")
+        sentiment_counts = corrected_comments['sentimen'].value_counts()
+        plt.figure(figsize=(10, 6))
+        plt.bar(sentiment_counts.index, sentiment_counts.values, color=['blue', 'green', 'orange', 'red', 'purple'])
+        plt.xlabel('Sentimen')
+        plt.ylabel('Number of Corrected Comments')
+        plt.title('Number of Corrected Comments by Sentiment')
+        plt.show()
+        st.pyplot(plt)
+        st.subheader("Download Options")
+        excel_buffer_cluster = io.BytesIO()
+        with pd.ExcelWriter(excel_buffer_cluster, engine='xlsxwriter') as writer:
+            ba_lainnya_data.to_excel(writer, index=False, sheet_name='Clustered Data')
+        excel_buffer_cluster.seek(0)
+        st.download_button(
+            label=f"Download Clustered Data for {selected_candidate}",
+            data=excel_buffer_cluster,
+            file_name=f"clustered_data_{selected_candidate}.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+        )
+        st.subheader("Save Corrected Comments and Non-'BA Lainnya' Data to Training Dataset")
+        if st.button("Save All to Dataset"):
+            try:
+                # Gabungkan hasil klasifikasi tanpa tag 'BA Lainnya' dan komentar yang telah dikoreksi
+                combined_data = pd.concat([st.session_state['data_without_ba'], corrected_comments], ignore_index=True)
+                combined_data['tanggal_masuk'] = pd.Timestamp.now().strftime('%Y-%m-%d')
+                update_training_dataset(combined_data, st.session_state['candidate'])  # Panggil fungsi baru
+                st.success("Corrected comments and classified data without 'BA Lainnya' have been saved to the training dataset.")
+                # Clear the session state to prevent duplicate saves
+                st.session_state['data_with_ba'] = pd.DataFrame(columns=corrected_comments.columns)
+                st.session_state['data_without_ba'] = pd.DataFrame(columns=corrected_comments.columns)
+                # Rerun automatically after saving to dataset
+                st.rerun()
+            except Exception as e:
+                st.error(f"An error occurred while saving the data: {e}")
+    except FileNotFoundError:
+        st.error(f"No dataset found for {selected_candidate}. Please add data to create the dataset.")
+    except Exception as e:
+        st.error(f"An unexpected error occurred: {e}")
+# === Sidebar Navigation === #
+menu = st.sidebar.radio("Select a Feature", ["Model-Based Classification","Clustering for 'BA Lainnya'", "Update Keywords","View Training Dataset","Evaluate Data Train","Maximize Preprocessing","Retraining Model"])
+if menu == "Model-Based Classification":
+    st.title("Model-Based Classification")
+    candidate = st.selectbox("Choose a candidate:", candidate_list)
+    model_path = f"/content/best_rf_model_{candidate.replace(' ', '_').lower()}.joblib"
+    vectorizer_path = f"/content/tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib"
+    # Save the selected candidate to session state
+    st.session_state['candidate'] = candidate
+    uploaded_file = st.file_uploader("Upload an Excel file for classification", type=['xlsx'])
+    @st.cache_data
+    def load_model_and_vectorizer(model_path, vectorizer_path):
+        """Load model and vectorizer, cache them for efficiency."""
+        try:
+            model = joblib.load(model_path)
+            vectorizer = joblib.load(vectorizer_path)
+            return model, vectorizer
+        except FileNotFoundError:
+            return None, None
+    model, vectorizer = load_model_and_vectorizer(model_path, vectorizer_path)
+    if not model or not vectorizer:
+        st.error("Model or vectorizer not found for the selected candidate.")
+        st.stop()
+    # Save the vectorizer and model to session state
+    st.session_state['vectorizer'] = vectorizer
+    st.session_state['model'] = model
+    @st.cache_data
+    def preprocess_data(data):
+        """Preprocess comments with batching for large datasets."""
+        from joblib import Parallel, delayed
+        def preprocess_batch(batch):
+            batch['translated_emojis'] = batch['komentar'].apply(translate_emojis)
+            batch['normalized_unicode'] = batch['translated_emojis'].apply(normalize_unicode)
+            batch['reply_handled'] = batch['normalized_unicode'].apply(handle_replies)
+            batch['clean_text'] = batch['reply_handled'].apply(clean_text)
+            batch['translated_ntb'] = batch['clean_text'].apply(lambda x: translate_text(x, ntb_dict))
+            batch['translated_slang'] = batch['translated_ntb'].apply(lambda x: translate_text(x, slang_dict))
+            batch['negation_handled'] = batch['translated_slang'].apply(handle_negation)
+            return batch
+        batch_size = 10000  # Process 10,000 rows at a time
+        batches = [data.iloc[i:i+batch_size] for i in range(0, len(data), batch_size)]
+        processed_batches = Parallel(n_jobs=-1)(delayed(preprocess_batch)(batch) for batch in batches)
+        return pd.concat(processed_batches, ignore_index=True)
+    # Process uploaded file
+    if uploaded_file:
+        try:
+            data = pd.read_excel(uploaded_file)
+            if 'komentar' not in data.columns:
+                st.error("The uploaded file must include a 'komentar' column.")
+                st.stop()
+            data = preprocess_data(data)
+        except Exception as e:
+            st.error(f"An error occurred while processing the file: {e}")
+            st.stop()
+    elif 'model_classified_data' in st.session_state:
+        data = st.session_state['model_classified_data']
+    else:
+        st.info("Please upload a file for classification.")
+        st.stop()
+    # Transform comments into TF-IDF vectors
+    try:
+        tfidf_data = vectorizer.transform(data['negation_handled'].fillna(''))
+        data['predicted_category'] = model.predict(tfidf_data)
+        data['probabilities'] = model.predict_proba(tfidf_data).tolist()
+        data['max_probability'] = data['probabilities'].apply(lambda x: max(x))
+    except Exception as e:
+        st.error(f"An error occurred during model prediction: {e}")
+        st.stop()
+    # Cache classified data
+    st.session_state['model_classified_data'] = data
+    # Interactive threshold adjustment
+    st.subheader("Set Threshold for 'BA Lainnya'")
+    threshold = st.slider("Threshold for tagging 'BA Lainnya'", min_value=0.0, max_value=1.0, value=0.80, step=0.01)
+    # Apply threshold to tag "BA Lainnya"
+    data['tag'] = data['max_probability'].apply(lambda x: 'BA Lainnya' if x < threshold else '')
+    # Separate data for visualization
+    data_without_ba = data[data['tag'] != 'BA Lainnya']
+    data_with_ba = data[data['tag'] == 'BA Lainnya']
+    # Save updated results to session state for dynamic updates
+    st.session_state['data_without_ba'] = data_without_ba
+    st.session_state['data_with_ba'] = data_with_ba
+    # Preview Results
+    st.subheader("Preview Results")
+    st.write("### 1. Hasil Klasifikasi Tanpa Tag 'BA Lainnya'")
+    if not data_without_ba.empty:
+        st.dataframe(data_without_ba[['komentar', 'predicted_category', 'max_probability']])
+    else:
+        st.info("No high-probability classifications available.")
+    st.write("### 2. Hasil Klasifikasi Dengan Tag 'BA Lainnya'")
+    if not data_with_ba.empty:
+        st.dataframe(data_with_ba[['komentar', 'predicted_category', 'max_probability']])
+    else:
+        st.info("No low-probability classifications available.")
+    # Visualization: Sentiment Distribution
+    st.subheader("Sentiment Distribution Visualization")
+    def plot_distribution(data, title):
+        sentiment_counts = data['predicted_category'].value_counts()
+        fig, ax = plt.subplots()
+        ax.bar(sentiment_counts.index, sentiment_counts.values)
+        ax.set_title(title)
+        ax.set_xlabel("Sentiments")
+        ax.set_ylabel("Count")
+        st.pyplot(fig)
+    if not data_without_ba.empty:
+        plot_distribution(data_without_ba, "Sentiment Distribution (Without 'BA Lainnya')")
+    if not data_with_ba.empty:
+        plot_distribution(data_with_ba, "Sentiment Distribution (With 'BA Lainnya')")
+    # Download Results
+    st.subheader("Download Results")
+    excel_buffer = io.BytesIO()
+    with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
+        data.to_excel(writer, index=False, sheet_name='Classification Results')
+    excel_buffer.seek(0)
+    st.download_button(
+        label="Download All Classification Results",
+        data=excel_buffer,
+        file_name=f"classification_results_{candidate}.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    )
+    # Save Non-"BA Lainnya" Data to Training Dataset
+    if not data_with_ba.empty:
+        st.warning("There are comments with 'BA Lainnya' tagging. Please proceed to 'Clustering for BA Lainnya'.")
+    elif not data_without_ba.empty:
+        st.subheader("Save Classified Data")
+        if st.button("Save Non-'BA Lainnya' Data to Training Dataset"):
+            try:
+                data_to_save = data_without_ba[['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'tanggal']].copy()
+                data_to_save['sentimen'] = data_without_ba['predicted_category']
+                data_to_save['tanggal_masuk'] = pd.Timestamp.now().strftime('%Y-%m-%d')
+                update_training_dataset(data_to_save, candidate)
+                st.success("Data successfully saved to the training dataset.")
+            except Exception as e:
+                st.error(f"An error occurred while saving the data: {e}")
+    else:
+        st.info("No Non-'BA Lainnya' data available to save.")
+    pass  # Placeholder
+# Integrasi fungsi dalam halaman "Clustering for 'BA Lainnya'"
+if menu == "Clustering for 'BA Lainnya'":
+    selected_candidate = st.session_state['selected_candidate'] if 'selected_candidate' in st.session_state else None
+    run_clustering_for_ba_lainnya()
+    pass  # Placeholder
+# Memastikan kode ini hanya dijalankan jika menu yang dipilih adalah "View Training Dataset"
+if menu == "View Training Dataset":
+    st.title("View Training Dataset")
+    # Header untuk memilih kandidat
+    st.header("Options")
+    selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
+    # Path dataset
+    dataset_path = f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx"
+    history_path = f"history_{selected_candidate.lower().replace(' ', '_')}.json"
+    # Memuat dataset
+    try:
+        df = pd.read_excel(dataset_path)
+        # Memastikan kolom yang diperlukan ada
+        required_columns = ['model_data', 'Platform', 'komentar', 'sentimen', 'tanggal', 'tanggal_masuk', 'link', 'evaluated_by_data_train']
+        for col in required_columns:
+            if col not in df.columns:
+                if col == 'evaluated_by_data_train':
+                    df[col] = False
+        # Menambahkan kolom 'update_ba' jika belum ada
+        if 'update_ba' not in df.columns:
+            df['update_ba'] = None
+        # Menambahkan kolom 'missing_comment' jika belum ada
+        if 'missing_comment' not in df.columns:
+            df['missing_comment'] = False
+        # Pastikan 'tanggal_masuk' dan 'tanggal' dalam format datetime yang benar
+        df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
+        df['tanggal'] = pd.to_datetime(df['tanggal'], errors='coerce')
+        # Konversi kolom tanggal agar hanya menyimpan bagian tanggal tanpa waktu
+        df['tanggal'] = df['tanggal'].dt.date
+        df['tanggal_masuk'] = df['tanggal_masuk'].dt.date
+        df['update_ba'] = pd.to_datetime(df['update_ba'], errors='coerce').dt.date
+        # Menangani nilai NaT (Not a Time) jika ada
+        if df['tanggal_masuk'].isnull().any():
+            st.warning("Some dates 'tanggal_masuk' could not be parsed correctly. Please check the date format in the dataset.")
+            df['tanggal_masuk'].fillna(pd.Timestamp.now().date(), inplace=True)
+        if df['tanggal'].isnull().any():
+            st.warning("Some dates 'tanggal' could not be parsed correctly. Please check the date format in the dataset.")
+            df['tanggal'].fillna(pd.Timestamp.now().date(), inplace=True)
+        # Menambahkan kolom 'kandidat' jika belum ada dan mengisinya
+        if 'kandidat' not in df.columns:
+            df['kandidat'] = selected_candidate
+        # Mengambil subset kolom yang diperlukan
+        df = df[required_columns + ['update_ba', 'kandidat', 'missing_comment']]
+        # Perbarui nilai None di update_ba dengan tanggal_masuk
+        df['update_ba'].fillna(df['tanggal_masuk'], inplace=True)
+        # Menampilkan statistik dasar
+        st.subheader(f"Training Dataset for {selected_candidate}")
+        st.write(f"**Total rows in dataset:** {len(df)}")
+        if not df.empty:
+            # Visualisasi sebaran update BA
+            st.subheader("Visualisasi Postingan Berdasarkan Update BA")
+            ba_update_counts = df['update_ba'].value_counts().sort_index()
+            fig, ax = plt.subplots(figsize=(10, 6))
+            ba_update_counts.plot(kind='bar', ax=ax, color='blue')
+            ax.set_title('Sebaran Postingan Berdasarkan Update BA')
+            ax.set_xlabel('Tanggal Update BA')
+            ax.set_ylabel('Jumlah Postingan')
+            plt.xticks(rotation=45)
+            plt.tight_layout()
+            st.pyplot(fig)
+            # Visualisasi tambahan sebaran platform
+            st.subheader("Sebaran Platform Berdasarkan Update BA")
+            platform_counts = df['Platform'].value_counts()
+            fig, ax = plt.subplots(figsize=(10, 6))
+            platform_counts.plot(kind='bar', ax=ax, color='green')
+            ax.set_title('Sebaran Platform Berdasarkan Update BA')
+            ax.set_xlabel('Platform')
+            ax.set_ylabel('Jumlah Postingan')
+            plt.xticks(rotation=45)
+            plt.tight_layout()
+            st.pyplot(fig)
+            # Visualisasi jumlah komentar hilang berdasarkan platform
+            st.subheader("Jumlah Komentar Hilang Berdasarkan Platform")
+            missing_comments_by_platform = df.groupby('Platform')['missing_comment'].sum().sort_index()
+            fig, ax = plt.subplots(figsize=(10, 6))
+            missing_comments_by_platform.plot(kind='bar', ax=ax, color='red')
+            ax.set_title('Jumlah Komentar Hilang Berdasarkan Platform')
+            ax.set_xlabel('Platform')
+            ax.set_ylabel('Jumlah Komentar Hilang')
+            plt.xticks(rotation=45)
+            plt.tight_layout()
+            st.pyplot(fig)
+            # Filter berdasarkan status validasi
+            st.subheader("Filter Data")
+            validation_filter = st.radio(
+                "Choose data type to view:",
+                ["All Data", "Validated Data", "Non-Validated Data"],
+                key='validation_filter'
+            )
+            if validation_filter == "Validated Data":
+                filtered_data = df[df['evaluated_by_data_train'] == True]
+            elif validation_filter == "Non-Validated Data":
+                filtered_data = df[df['evaluated_by_data_train'] == False]
+            else:
+                filtered_data = df
+            if not filtered_data.empty:
+                st.subheader(f"Filtered Data: {validation_filter}")
+                st.dataframe(filtered_data)  # Menampilkan semua data yang sesuai dengan filter
+            else:
+                st.warning("Tidak ada data yang sesuai dengan filter yang dipilih.")
+            # Menampilkan riwayat penambahan data
+            st.subheader("History of Data Additions")
+            try:
+                with open(history_path, "r") as f:
+                    history = json.load(f)
+                history_list = []
+                for key, value in history.items():
+                    for entry in value:
+                        for k, v in entry.items():
+                            history_list.append({
+                                'key': key,
+                                'field': k,
+                                'date': v
+                            })
+                history_df = pd.DataFrame(history_list)
+                st.dataframe(history_df)
+            except FileNotFoundError:
+                st.write("No addition history available.")
+            except ValueError as e:
+                st.error(f"An error occurred while loading history data: {e}")
+            # Opsi untuk mengunduh dataset yang telah difilter
+            st.subheader("Download Options")
+            if not filtered_data.empty:
+                excel_buffer = io.BytesIO()
+                with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
+                    filtered_data.to_excel(writer, index=False, sheet_name='Filtered Dataset')
+                excel_buffer.seek(0)
+                st.download_button(
+                    label=f"Download Filtered Dataset for {selected_candidate}",
+                    data=excel_buffer,
+                    file_name=f"filtered_training_dataset_{selected_candidate}.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                )
+            # Opsi untuk mengunduh seluruh dataset
+            if not df.empty:
+                excel_buffer_full = io.BytesIO()
+                with pd.ExcelWriter(excel_buffer_full, engine='xlsxwriter') as writer:
+                    df.to_excel(writer, index=False, sheet_name='Training Dataset')
+                excel_buffer_full.seek(0)
+                st.download_button(
+                    label=f"Download Full Training Dataset for {selected_candidate}",
+                    data=excel_buffer_full,
+                    file_name=f"training_dataset_{selected_candidate}.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                )
+            # Fitur untuk mengunduh dataset berdasarkan filter tanggal
+            st.subheader("Download Aggregated Data by Date")
+            # Mendapatkan nilai maksimum dan minimum tanggal untuk default date_input
+            if not df['tanggal'].empty:
+                min_date = df['tanggal'].min()
+                max_date = df['tanggal'].max()
+                selected_start_date = st.date_input("Select start date for aggregation:", value=min_date)
+                selected_end_date = st.date_input("Select end date for aggregation:", value=max_date)
+                agg_filtered_data = df[(df['tanggal'] >= selected_start_date) & (df['tanggal'] <= selected_end_date)]
+                if not agg_filtered_data.empty:
+                    # Pastikan kolom 'kandidat' ada dan diisi
+                    agg_filtered_data['kandidat'] = selected_candidate
+                    aggregated_data = agg_filtered_data.groupby(['kandidat', 'link', 'tanggal', 'Platform', 'model_data', 'update_ba']).agg(
+                        co_likes=('sentimen', lambda x: (x == 'Co Likes').sum()),
+                        co_support=('sentimen', lambda x: (x == 'Co Support').sum()),
+                        co_optimism=('sentimen', lambda x: (x == 'Co Optimism').sum()),
+                        co_sarkastic=('sentimen', lambda x: (x == 'Co Sarkastic').sum()),
+                        co_negative=('sentimen', lambda x: (x == 'Co Negative').sum()),
+                        missing_comments=('missing_comment', 'sum')  # Menambahkan agregasi untuk missing_comment
+                    ).reset_index()
+                    # Menambahkan kolom-kolom baru untuk jumlah komentar
+                    aggregated_data['jumlah_komentar_positif'] = aggregated_data['co_likes'] + aggregated_data['co_support'] + aggregated_data['co_optimism']
+                    aggregated_data['jumlah_komentar_negatif'] = aggregated_data['co_sarkastic'] + aggregated_data['co_negative']
+                    aggregated_data['jumlah_komentar'] = aggregated_data[['co_likes', 'co_support', 'co_optimism', 'co_sarkastic', 'co_negative']].sum(axis=1)
+                    st.dataframe(aggregated_data)
+                    # Visualisasi jumlah postingan yang diupdate BA dengan sebaran platform berdasarkan rentang tanggal
+                    st.subheader("Visualisasi Postingan yang Diupdate BA Berdasarkan Rentang Tanggal")
+                    ba_update_range = aggregated_data[aggregated_data['update_ba'] != 'Belum diupdate']
+                    if not ba_update_range.empty:
+                        plt.figure(figsize=(10, 6))
+                        ba_update_range['Platform'].value_counts().plot(kind='bar', title='Sebaran Platform - Diupdate BA (Rentang Tanggal)')
+                        plt.xlabel('Platform')
+                        plt.ylabel('Jumlah Postingan')
+                        st.pyplot(plt)
+                        # Tambahan visualisasi sebaran postingan berdasarkan tanggal
+                        st.subheader("Sebaran Postingan Berdasarkan Tanggal")
+                        plt.figure(figsize=(10, 6))
+                        ba_update_range['tanggal'].value_counts().sort_index().plot(kind='bar', title='Sebaran Postingan Berdasarkan Tanggal')
+                        plt.xlabel('Tanggal')
+                        plt.ylabel('Jumlah Postingan')
+                        plt.xticks(rotation=45)
+                        plt.tight_layout()
+                        st.pyplot(plt)
+                        # Tambahan visualisasi sebaran update BA
+                        st.subheader("Sebaran Update BA")
+                        plt.figure(figsize=(10, 6))
+                        ba_update_range['update_ba'].value_counts().sort_index().plot(kind='bar', title='Sebaran Update BA')
+                        plt.xlabel('Tanggal Update BA')
+                        plt.ylabel('Jumlah Postingan')
+                        plt.xticks(rotation=45)
+                        plt.tight_layout()
+                        st.pyplot(plt)
+                        # Visualisasi jumlah komentar berdasarkan tanggal_masuk
+                        st.subheader("Jumlah Komentar Berdasarkan Tanggal Masuk")
+                        plt.figure(figsize=(10, 6))
+                        agg_filtered_data.groupby('tanggal_masuk')['komentar'].count().sort_index().plot(kind='bar', title='Jumlah Komentar Berdasarkan Tanggal Masuk')
+                        plt.xlabel('Tanggal Masuk')
+                        plt.ylabel('Jumlah Komentar')
+                        plt.xticks(rotation=45)
+                        plt.tight_layout()
+                        st.pyplot(plt)
+                        # Visualisasi sebaran komentar di tiap platform
+                        st.subheader("Sebaran Komentar di Tiap Platform")
+                        plt.figure(figsize=(10, 6))
+                        agg_filtered_data['Platform'].value_counts().plot(kind='bar', title='Sebaran Komentar di Tiap Platform')
+                        plt.xlabel('Platform')
+                        plt.ylabel('Jumlah Komentar')
+                        plt.xticks(rotation=45)
+                        plt.tight_layout()
+                        st.pyplot(plt)
+                        # Visualisasi jumlah missing comments berdasarkan postingan
+                        st.subheader("Jumlah Komentar Hilang Berdasarkan Postingan")
+                        plt.figure(figsize=(10, 6))
+                        aggregated_data.groupby('link')['missing_comments'].sum().sort_index().plot(kind='bar', title='Jumlah Komentar Hilang Berdasarkan Postingan')
+                        plt.xlabel('Link')
+                        plt.ylabel('Jumlah Komentar Hilang')
+                        plt.xticks(rotation=45)
+                        plt.tight_layout()
+                        st.pyplot(plt)
+                    else:
+                        st.warning("Tidak ada data yang diupdate BA untuk rentang tanggal yang dipilih.")
+                    # Opsi untuk mengunduh dataset yang telah diagregasi
+                    excel_buffer_aggregated = io.BytesIO()
+                    with pd.ExcelWriter(excel_buffer_aggregated, engine='xlsxwriter') as writer:
+                        aggregated_data.to_excel(writer, index=False, sheet_name='Aggregated Data')
+                    excel_buffer_aggregated.seek(0)
+                    st.download_button(
+                        label=f"Download Aggregated Data by Date for {selected_candidate}",
+                        data=excel_buffer_aggregated,
+                        file_name=f"aggregated_data_{selected_candidate}.xlsx",
+                        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                    )
+                else:
+                    st.warning("Tidak ada data yang sesuai dengan rentang tanggal yang dipilih untuk diagregasi.")
+            else:
+                st.warning("Tidak ada data dalam dataset untuk divisualisasikan.")
+        else:
+            st.warning("Tidak ada data dalam dataset untuk divisualisasikan.")
+    except FileNotFoundError:
+        st.error(f"No training dataset found for {selected_candidate}. Please add data to create the dataset.")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+    pass
+if menu == "Evaluate Data Train":
+    st.title("Evaluate Data Train")
+    selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_evaluate')
+    dataset_path = f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx"
+    try:
+        df = pd.read_excel(dataset_path)
+        # Load existing keyword dictionary
+        try:
+            with open('keywords.json', 'r') as f:
+                keyword_dict = json.load(f)
+            st.success("keywords.json loaded successfully.")
+        except FileNotFoundError:
+            st.error("keywords.json file not found. Please ensure the file is in the correct directory.")
+            st.stop()
+        except json.JSONDecodeError:
+            st.error("keywords.json file is not a valid JSON. Please check the file format.")
+            st.stop()
+        # Select candidate-specific keywords
+        candidate_key = selected_candidate.replace(' ', '_')
+        candidate_keywords = keyword_dict.get(candidate_key)
+        if not candidate_keywords:
+            st.error(f"No keywords found for the selected candidate '{selected_candidate}'. Please update the 'keywords.json' file with appropriate keywords.")
+            st.stop()
+        keywords = [kw for sentiment_keywords in candidate_keywords.values() for kw in sentiment_keywords]
+        # Validasi Konsistensi Data
+        st.subheader("Data Consistency Validation")
+        missing_values = df.isnull().sum()
+        st.write("Missing values in each column:")
+        st.write(missing_values)
+        # Menghapus baris dengan nilai yang hilang
+        st.write("Removing rows with missing values...")
+        df.dropna(inplace=True)
+        # Distribusi Sentimen
+        st.subheader("Sentiment Distribution")
+        sentiment_counts = df['sentimen'].value_counts()
+        st.write("Number of comments for each sentiment:")
+        st.write(sentiment_counts)
+        # Kualitas Data
+        st.subheader("Data Quality Check")
+        invalid_entries = df[df['komentar'].str.len() == 0]
+        st.write(f"Number of invalid comments (empty): {len(invalid_entries)}")
+        if len(invalid_entries) > 0:
+            st.write("Invalid comments (empty):")
+            st.dataframe(invalid_entries.head(100))  # Preview 100 baris pertama
+        # Deteksi Inkonistensi Sentimen
+        st.subheader("Inconsistent Sentiment Labels")
+        duplicate_comments = df[df.duplicated(subset=['komentar'], keep=False)]
+        inconsistent_labels = duplicate_comments.groupby('komentar')['sentimen'].nunique()
+        inconsistent_labels = inconsistent_labels[inconsistent_labels > 1]
+        if not inconsistent_labels.empty:
+            inconsistent_labels_df = duplicate_comments[duplicate_comments['komentar'].isin(inconsistent_labels.index)]
+            st.write(f"Number of comments with inconsistent sentiment labels: {len(inconsistent_labels_df)}")
+            st.dataframe(inconsistent_labels_df.head(100))  # Preview 100 baris pertama
+        else:
+            st.write("No comments with inconsistent sentiment labels found.")
+        # Penanganan Masalah
+        st.subheader("Problem Handling")
+        # Menghapus komentar yang kosong
+        st.write("Removing invalid (empty) comments...")
+        df = df[df['komentar'].str.len() > 0]
+        # Interaktif: Menangani komentar dengan label sentimen yang tidak konsisten
+        st.write("Resolving inconsistent sentiment labels...")
+        if not inconsistent_labels.empty:
+            for index, row in inconsistent_labels_df.iterrows():
+                st.write(f"Comment: {row['komentar']}")
+                sentimen_options = df[df['komentar'] == row['komentar']]['sentimen'].unique().tolist()
+                new_sentimen = st.selectbox("Select correct sentiment", sentimen_options, key=f'sentimen_{index}')
+                if st.button("Update Sentiment", key=f'update_{index}'):
+                    update_sentiment(index, new_sentimen)
+        # Clustering menggunakan Keyword dan Model Sarkas
+        st.write("Clustering comments using keywords and sarcasm model...")
+        keyword_vectorizer = TfidfVectorizer(vocabulary=keywords)
+        X_keywords = keyword_vectorizer.fit_transform(df['komentar'])
+        kmeans = KMeans(n_clusters=10, random_state=0).fit(X_keywords)
+        df['cluster'] = kmeans.labels_
+        # Identifikasi kluster yang perlu ditinjau
+        review_clusters = df[df['cluster'].isin(df['cluster'].value_counts()[df['cluster'].value_counts() > 10].index)]
+        st.write("Clusters identified for review:")
+        st.dataframe(review_clusters.head(100))  # Preview 100 baris pertama
+        # Notifikasi Tambahkan Kata Kunci
+        st.warning("Some comments were not captured by the current keywords. Please add new keywords in the 'Update Keywords' section.")
+        # Visualisasi Klaster
+        cluster_counts = df['cluster'].value_counts()
+        st.write("Number of comments in each cluster:")
+        st.write(cluster_counts)
+        # Menampilkan hasil clustering
+        st.write("Comments clustered by patterns:")
+        st.dataframe(df.head(100))  # Preview 100 baris pertama
+        # Export hasil analisis dan penanganan
+        st.subheader("Export Final Data")
+        json_buffer = io.BytesIO()
+        df.to_json(json_buffer, orient='records', lines=True)
+        json_buffer.seek(0)
+        st.download_button(
+            label=f"Download Final Data for {selected_candidate}",
+            data=json_buffer,
+            file_name=f"final_data_{selected_candidate}.json",
+            mime="application/json"
+        )
+    except FileNotFoundError:
+        st.error(f"No training dataset found for {selected_candidate}. Please add data to create the dataset.")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+    pass  # Placeholder
+if menu == "Retraining Model":
+    st.title("Retrain Model")
+    selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
+    dataset_path = f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx"
+    model_path = f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib"
+    vectorizer_path = f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib"
+    retrain_history_path = f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json"
+    try:
+        # Load dataset
+        data = pd.read_excel(dataset_path)
+        # Ensure 'komentar' is string
+        if 'komentar' not in data.columns:
+            st.error("Dataset must include a 'komentar' column.")
+            st.stop()
+        else:
+            # Convert all comments to string
+            data['komentar'] = data['komentar'].fillna('').astype(str)
+        # Separate validated and unvalidated data
+        if 'evaluated_by_cluster' in data.columns:
+            validated_data = data[data['evaluated_by_cluster'] == True]
+            unvalidated_data = data[data['evaluated_by_cluster'] == False]
+        else:
+            validated_data = pd.DataFrame(columns=data.columns)
+            unvalidated_data = data
+        st.write(f"**Validated Data:** {len(validated_data)} rows")
+        st.write(f"**Unvalidated Data:** {len(unvalidated_data)} rows")
+        # Check if all data is validated
+        if len(unvalidated_data) > 0:
+            st.warning("Model retraining is only allowed if all data has been validated through 'Evaluate Clustering'. Please ensure all data is validated before retraining the model.")
+            st.stop()
+        # Combine all data for preprocessing
+        combined_data = validated_data  # Only use validated data
+        # Preprocessing Function
+        @st.cache_data(show_spinner=True)
+        def preprocess_data(data):
+            from joblib import Parallel, delayed
+            def preprocess_comment(comment):
+                comment = translate_emojis(comment)
+                comment = normalize_unicode(comment)
+                comment = handle_replies(comment)
+                comment = clean_text(comment)
+                comment = translate_text(comment, ntb_dict)
+                comment = translate_text(comment, slang_dict)
+                comment = handle_negation(comment)
+                return comment
+            data['processed_comments'] = Parallel(n_jobs=-1)(
+                delayed(preprocess_comment)(c) for c in data['komentar']
+            )
+            return data
+        # Preprocessing
+        st.write("Starting preprocessing...")
+        combined_data = preprocess_data(combined_data)
+        if st.button("Retrain Model"):
+            # Vectorization
+            st.write("Vectorizing data...")
+            vectorizer = TfidfVectorizer(ngram_range=(1, 1), max_features=5000)
+            X = vectorizer.fit_transform(combined_data['processed_comments'])
+            y = combined_data['sentimen']
+            # Split Data
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+            # Handle Class Imbalance with SMOTE
+            st.write("Balancing data with SMOTE...")
+            smote = SMOTE(random_state=42, n_jobs=-1)
+            X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
+            # Train Random Forest Model
+            st.write("Training Random Forest model...")
+            rf_model = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)
+            rf_model.fit(X_train_res, y_train_res)
+            # Evaluate on Training Data
+            st.write("Evaluating model...")
+            y_pred_train = rf_model.predict(X_train)
+            accuracy_train = accuracy_score(y_train, y_pred_train)
+            report_train = classification_report(y_train, y_pred_train, output_dict=True)
+            # Evaluate on Test Data
+            y_pred_test = rf_model.predict(X_test)
+            accuracy_test = accuracy_score(y_test, y_pred_test)
+            report_test = classification_report(y_test, y_pred_test, output_dict=True)
+            # Save Model and Vectorizer
+            st.write("Saving model and vectorizer...")
+            joblib.dump(rf_model, model_path)
+            joblib.dump(vectorizer, vectorizer_path)
+            # Log Retraining History
+            st.write("Logging retraining history...")
+            try:
+                with open(retrain_history_path, "r") as f:
+                    retrain_history = json.load(f)
+            except FileNotFoundError:
+                retrain_history = []
+            retrain_history.append({
+                "date_retrained": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
+                "accuracy_on_train": accuracy_train,
+                "accuracy_on_test": accuracy_test,
+                "f1_score_on_train": report_train['weighted avg']['f1-score'],
+                "f1_score_on_test": report_test['weighted avg']['f1-score'],
+            })
+            with open(retrain_history_path, "w") as f:
+                json.dump(retrain_history, f, indent=4)
+            # Display Results
+            st.success(f"Model retrained successfully! Accuracy on training data: {accuracy_train:.4f}, Accuracy on test data: {accuracy_test:.4f}")
+            st.subheader("Model Metrics on Training Data")
+            st.table(pd.DataFrame(report_train).T)
+            st.subheader("Model Metrics on Test Data")
+            st.table(pd.DataFrame(report_test).T)
+            # Show Retrain History
+            st.subheader("Retrain History")
+            st.json(retrain_history)
+    except FileNotFoundError:
+        st.error(f"No training dataset found for {selected_candidate}. Please add data to create the dataset.")
+    except Exception as e:
+        st.error(f"An unexpected error occurred: {e}")
+    pass  # Placeholder
+if menu == "Maximize Preprocessing":
+    st.title("Maximize Preprocessing")
+    # Load Dataset Train
+    candidate = st.selectbox("Choose a candidate:", list(candidate_list))
+    dataset_path = f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx"
+    try:
+        # Load dataset
+        data = pd.read_excel(dataset_path)
+        # Ensure 'komentar' is string
+        if 'komentar' not in data.columns:
+            st.error("Dataset must include a 'komentar' column.")
+            st.stop()
+        data['komentar'] = data['komentar'].fillna('').astype(str)
+        # Preprocessing Steps
+        @st.cache_data(show_spinner=True)
+        def preprocess_data(data):
+            st.write("Starting preprocessing...")
+            data['translated_emojis'] = data['komentar'].apply(translate_emojis)
+            data['normalized_unicode'] = data['translated_emojis'].apply(normalize_unicode)
+            data['reply_handled'] = data['normalized_unicode'].apply(handle_replies)
+            data['clean_text'] = data['reply_handled'].apply(clean_text)
+            data['translated_ntb'] = data['clean_text'].apply(lambda x: translate_text(x, ntb_dict))
+            data['translated_slang'] = data['translated_ntb'].apply(lambda x: translate_text(x, slang_dict))
+            data['negation_handled'] = data['translated_slang'].apply(handle_negation)
+            return data
+        data = preprocess_data(data)
+        # Check Unmapped Words
+        st.subheader("Check for Unmapped Words")
+        all_words = (word.lower() for comment in data['negation_handled'] for word in comment.split())
+        unique_words = set(all_words)
+        ntb_dict_keys = set(ntb_dict.keys())
+        slang_dict_keys = set(slang_dict.keys())
+        mapped_words = ntb_dict_keys.union(slang_dict_keys)
+        unmapped_words = sorted(unique_words - mapped_words)
+        if unmapped_words:
+            st.write(f"Found **{len(unmapped_words)} unmapped words.**")
+            # Pilihan jumlah kata yang ingin ditampilkan
+            max_words = st.slider(
+                "Select number of words to display:",
+                min_value=10,
+                max_value=len(unmapped_words),
+                value=min(50, len(unmapped_words)),
+                step=10,
+            )
+            # Buat DataFrame untuk tampilan tabel
+            unmapped_df = pd.DataFrame(unmapped_words, columns=["Unmapped Words"])
+            st.dataframe(unmapped_df.head(max_words))
+            # Tampilkan jumlah total kata jika dibutuhkan
+            st.caption(f"Showing {min(max_words, len(unmapped_words))} out of {len(unmapped_words)} unmapped words.")
+        else:
+            st.success("No unmapped words found!")
+        # Add Words to Dictionary
+        st.subheader("Add New Words to Dictionary")
+        new_word = st.text_input("Enter new word:")
+        normalized_word = st.text_input("Enter normalized form:")
+        dictionary_choice = st.radio("Select dictionary to update:", ["Kamus Alay", "Kamus ntb"])
+        if st.button("Add to Dictionary"):
+            if new_word and normalized_word:
+                if dictionary_choice == "Kamus Alay":
+                    slang_dict[new_word.lower()] = normalized_word
+                    st.success(f"Added '{new_word}' -> '{normalized_word}' to Kamus Alay.")
+                elif dictionary_choice == "Kamus ntb":
+                    ntb_dict[new_word.lower()] = normalized_word
+                    st.success(f"Added '{new_word}' -> '{normalized_word}' to Kamus ntb.")
+            else:
+                st.warning("Please enter both the new word and its normalized form.")
+        # Save Updates to File
+        st.subheader("Save Updated Dictionaries")
+        if st.button("Save Kamus Alay"):
+            kamus_alay_path = '/content/kamusalay.csv'  # Adjust the path as needed
+            pd.DataFrame(list(slang_dict.items()), columns=["slang", "formal"]).to_csv(kamus_alay_path, index=False)
+            st.success(f"Kamus Alay saved successfully to {kamus_alay_path}.")
+        if st.button("Save Kamus ntb"):
+            kamus_ntb_path = '/content/ntb_dict.json'  # Adjust the path as needed
+            with open(kamus_ntb_path, 'w', encoding='utf-8') as f:
+                json.dump(ntb_dict, f, indent=4)
+            st.success(f"Kamus ntb saved successfully to {kamus_ntb_path}.")
+    except FileNotFoundError:
+        st.error(f"No training dataset found for {candidate}. Please ensure the dataset is available.")
+    except Exception as e:
+        st.error(f"An unexpected error occurred: {e}")
+    pass  # Placeholder
+if menu == "Update Keywords":
+    st.title("Update Keywords")
+    # Load existing keyword dictionary
+    with open('keywords.json', 'r') as f:
+        keyword_dict = json.load(f)
+    # Show current keywords
+    st.subheader("Current Keywords")
+    candidate = st.selectbox("Select candidate", list(keyword_dict.keys()))
+    for sentiment, keywords in keyword_dict[candidate].items():
+        st.write(f"{sentiment}: {', '.join(keywords)}")
+    # Add new keyword
+    st.subheader("Add New Keyword")
+    new_keyword = st.text_input("Enter new keyword")
+    selected_sentiment = st.selectbox("Select sentiment for new keyword", list(keyword_dict[candidate].keys()))
+    if st.button("Add Keyword"):
+        if new_keyword and selected_sentiment:
+            keyword_dict[candidate][selected_sentiment].append(new_keyword)
+            with open('keywords.json', 'w') as f:
+                json.dump(keyword_dict, f, indent=4)
+            st.success(f"Keyword '{new_keyword}' added to {selected_sentiment} for {candidate}")
+        else:
+            st.error("Please enter a keyword and select a sentiment")
+    # Analyze Special Cluster
+    st.subheader("Analyze Special Cluster")
+    if 'ba_lainnya_data' in st.session_state:
+        try:
+            # Langsung Memuat Data `Special Cluster`
+            special_cluster_data = st.session_state['ba_lainnya_data'][st.session_state['ba_lainnya_data']['Cluster_Name'] == 'Special Cluster']
+            if special_cluster_data.empty:
+                st.warning("No data found in Special Cluster.")
+            else:
+                st.write(f"Total comments in Special Cluster: {len(special_cluster_data)}")
+                all_words_special = []
+                for comment in special_cluster_data['negation_handled']:
+                    comment = translate_emojis(comment)
+                    comment = normalize_unicode(comment)
+                    comment = handle_replies(comment)
+                    comment = clean_text(comment)
+                    comment = translate_text(comment, {})  # Adjust based on your dictionary
+                    comment = handle_negation(comment)
+                    words = preprocess_text(comment)
+                    all_words_special.extend(words)
+                # Calculate word frequencies
+                word_freq_special = Counter(all_words_special)
+                # Add slider to select number of words to display
+                num_words_special = st.slider("Number of words to display (Special Cluster)", min_value=5, max_value=50, value=20)
+                most_common_words_special = word_freq_special.most_common(num_words_special)
+                # Display word frequencies as a table
+                st.subheader(f"Top {num_words_special} Word Frequencies in Special Cluster")
+                word_freq_df_special = pd.DataFrame(most_common_words_special, columns=['Word', 'Frequency'])
+                st.dataframe(word_freq_df_special)
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+    else:
+        st.warning("No 'BA Lainnya' data found. Please classify comments first.")
+    # Analyze Training Data
+    st.subheader("Analyze Training Data")
+    dataset_path = f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx"
+    try:
+        train_data = pd.read_excel(dataset_path)
+        if train_data.empty:
+            st.warning("Training dataset is empty.")
+        else:
+            all_words_train = []
+            for comment in train_data['komentar'].astype(str):
+                comment = translate_emojis(comment)
+                comment = normalize_unicode(comment)
+                comment = handle_replies(comment)
+                comment = clean_text(comment)
+                comment = translate_text(comment, {})  # Adjust based on your dictionary
+                comment = handle_negation(comment)
+                words = preprocess_text(comment)
+                all_words_train.extend(words)
+            # Calculate word frequencies
+            word_freq_train = Counter(all_words_train)
+            # Add slider to select number of words to display
+            num_words_train = st.slider("Number of words to display (Training Data)", min_value=5, max_value=50, value=20)
+            most_common_words_train = word_freq_train.most_common(num_words_train)
+            # Display word frequencies as a table
+            st.subheader(f"Top {num_words_train} Word Frequencies in Training Data")
+            word_freq_df_train = pd.DataFrame(most_common_words_train, columns=['Word', 'Frequency'])
+            st.dataframe(word_freq_df_train)
+    except FileNotFoundError:
+        st.error(f"Training dataset for {candidate} not found.")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+    # Option to export keywords
+    st.subheader("Export Keywords")
+    json_buffer = io.BytesIO()
+    json_buffer.write(json.dumps(keyword_dict).encode('utf-8'))
+    json_buffer.seek(0)
+    st.download_button(
+        label="Export Keywords",
+        data=json_buffer,
+        file_name="keywords.json",
+        mime="application/json"
+    )
+    # Option to import keywords
+    st.subheader("Import Keywords")
+    uploaded_file = st.file_uploader("Choose a JSON file", type="json")
+    if uploaded_file is not None:
+        imported_keywords = json.load(uploaded_file)
+        keyword_dict.update(imported_keywords)
+        with open('keywords.json', 'w') as f:
+            json.dump(keyword_dict, f, indent=4)
+        st.success("Keywords imported successfully")
+    pass

best_rf_model_indah_dhamayanti_putri.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e28bb4dacc869a7b71e089118a07351a8de60175fce28b3b8e2b8c01e651ceba
+size 15044729

best_rf_model_lalu_muhamad_iqbal.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d421efb1c2b5876a1dab8d8dc11a351d75064bf0c32a24ef3df3a9913670182
+size 44015033

best_rf_model_m_suhaili.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:949b91ab83ead20c613ced16780bda52780ecac0d56c0f32ccec44131d44ebe2
+size 13640633

best_rf_model_musyafirin.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cb226ee3fab98e8d2af12cd5329f73beb54e4b5a1fa871d01c2c2029a31d5d2
+size 6092665

best_rf_model_sitti_rohmi_djalilah.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7eecdcd277c204d1d771cd767169ab0f026ed8544516f7f43389aab32f0a27a6
+size 24894969

best_rf_model_zulkieflimansyah.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5d7ce2da36aadee463102fa50e42d658ac46d3a690ff81329d7a4d81956b0f
+size 23188793

datasetntbnew_indah_dhamayanti_putri.xlsx ADDED Viewed

Binary file (57.4 kB). View file

datasetntbnew_lalu_muhamad_iqbal.xlsx ADDED Viewed

Binary file (104 kB). View file

datasetntbnew_m_suhaili.xlsx ADDED Viewed

Binary file (57.9 kB). View file

datasetntbnew_musyafirin.xlsx ADDED Viewed

Binary file (88.5 kB). View file

datasetntbnew_sitti_rohmi_djalilah.xlsx ADDED Viewed

Binary file (67.5 kB). View file

datasetntbnew_zulkieflimansyah.xlsx ADDED Viewed

Binary file (67.6 kB). View file

kamusalay.csv ADDED Viewed

	@@ -0,0 +1,238 @@

+ajh,saja
+ajj,saja
+akoh,aku
+amaca,ah masa
+amuh,kamu
+aph,apa
+apose,apa
+apz,apa
+aq,saya
+baryaw,sabar ya
+baryw,sabar ya
+bryaw,sabar ya
+bryw,sabar ya
+bay,selamat tinggal
+besoq,besok
+beud,banget
+bhay,selamat tinggal
+bisya,bisa
+biza,bisa
+bntr,sebentar
+bokap,ayah
+bkap,ayah
+bkp,ayah
+bokaps,ayah
+bokapz,ayah
+bs,bisa
+bsa,bisa
+bsk,besok
+bsoq,besok
+bte,bosan
+bozen,bosan
+bozn,bosan
+bzn,bosan
+bzen,bosan
+cabut,pergi
+caiank,sayang
+cekola,sekolah
+cekolah,sekolah
+celalaw,selalu
+celalu,selalu
+cemungudh,semangat
+cemungut,semangat
+cemunguth,semangat
+cibuq,sibuk
+cini,sini
+ciyus,serius
+cll,selalu
+cllu,selalu
+cllw,selalu
+cpe,capek
+cpee,capek
+cewe,cewek
+cwe,cewek
+cowo,cowok
+cwo,cowok
+dah,sudah
+dapa,ada apa
+dapah,ada apa
+dftr,daftar
+dh,sudah
+dimance,dimana
+dimandose,dimana
+dimans,dimana
+duluw,dulu
+ea,ya
+emg,memang
+eteb,bosan
+g,tidak
+ga,tidak
+gabut,menganggur
+gak,tidak
+gakz,tidak
+gatau,tidak tahu
+gataw,tidak tahu
+gengges,ganggu
+ghiy,lagi
+gi,lagi
+gk,tidak
+gpp,tidak apa apa
+gtw,tidak tahu
+gue,saya
+gw,saya
+gx,tidak
+hums,rumah
+humz,rumah
+huum,iya
+iy,iya
+iyach,iya
+iyap,iya
+iyapz,iya
+iyup,iya
+iyupz,iya
+iz,iya
+iza,iya
+izza,iya
+jamber,jam berapa
+jd,jadi
+jdi,jadi
+jg,juga
+jga,juga
+jgn,jangan
+jngan,jangan
+jngn,jangan
+kacian,kasihan
+kaka,kakak
+kau,kamu
+keles,kali
+kenapah,kenapa
+kenaps,kenapa
+kenapz,kenapa
+kepo,ingin tahu
+keyen,keren
+khan,kan
+khanz,kan
+kk,kakak
+klo,kalau
+klw,kalau
+km,kamu
+kmrn,kemarin
+kmu,kamu
+knp,kenapa
+koq,kok
+kpan,kapan
+kpn,kapan
+kuq,kok
+kuy,ayo
+kw,kamu
+kzl,kesal
+lam,salam
+leh,boleh
+lo,kamu
+loe,kamu
+lom,belum
+low,kalau
+lp,lupa
+lu,kamu
+luchu,lucu
+lum,belum
+lun,belum
+luthu,lucu
+lw,kamu
+maacih,terima kasih
+maap,maaf
+mager,malas bergerak
+makaci,terima kasih
+maw,mau
+miapa,demi apa
+miapah,demi apa
+misal'a,misalnya
+muup,maaf
+mu'uv,maaf
+mw,mau
+nak,anak
+naq,anak
+nax,anak
+nda,tidak
+ndak,tidak
+ndax,tidak
+ngabungin,menggabungkan
+ngajak,mengajak
+ngerokok,merokok
+ngga,tidak
+nggak,tidak
+nggax,tidak
+nggesek,menggesek
+nggosok,menggosok
+ngibul,berbohong
+nyokap,ibu
+nykap,ibu
+nykaps,ibu
+nykapz,ibu
+nykp,ibu
+nich,ini
+nntn,menonton
+ntn,menonton
+oc,oke
+oce,oke
+ohh,oh
+ok,oke
+okedech,oke
+okedeh,oke
+okeh,oke
+okz,oke
+org,orang
+ouch,oh
+ouh,oh
+owh,oh
+pasutri,pasangan suami istri
+paz,pas
+pengen,ingin
+pengin,ingin
+pgn,ingin
+psti,pasti
+pzt,pasti
+q,saya
+qaqa,kakak
+qq,kakak
+rmh,rumah
+sabeb,bebas
+sabi,bisa
+salfok,salah fokus
+saltum,salah kostum
+sdh,sudah
+selaw,santai
+selow,santai
+shap,siap
+shaps,siap
+syipp,sip
+syp,siapa
+tau,tahu
+tauk,tahu
+tdk,tidak
+telp,telepon
+tgl,tanggal
+thx,terima kasih
+tipi,televisi
+tp,tapi
+tq,terima kasih
+trims,terima kasih
+trimz,terima kasih
+tuch,itu
+tw,tahu
+u,kamu
+u,kamu
+udah,sudah
+udd,sudah
+udh,sudah
+uga,juga
+von,telepon
+w,saya
+wad,buat
+wat,buat
+yank,sayang
+yap,ya
+yaw,ya
+yoi,iya
+yups,ya
+yupz,ya

keywords.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "Musyafirin": {
+        "Co Likes": ["keren", "bagus", "diakui", "disukai", "tegas"],
+        "Co Support": ["pemimpin baik", "pilihan tepat", "jujur", "adil", "kinerja baik"],
+        "Co Optimism": ["maju terus", "berhasil", "terdepan", "pengaruh positif", "optimis"],
+        "Co Negative": ["kekurangan", "buruk", "tidak peduli", "masalah", "tidak mampu"],
+        "Co Sarkastic": ["oh tentu", "iya benar", "seolah-olah", "oh hebat", "pasti", "benar sekali", "sangat meyakinkan", "tidak mungkin", "teruskan", "oh iya"]
+    },
+    "Sitti_Rohmi_Djalillah": {
+        "Co Likes": ["baik", "cantik", "inspiratif", "cerdas", "menarik"],
+        "Co Support": ["hebat", "terbaik", "pemimpin bijak", "solid", "juara", "unggul"],
+        "Co Optimism": ["masa depan cerah", "harapan", "kepercayaan", "optimis", "juara"],
+        "Co Negative": ["gagal", "tidak mendukung", "lemah", "tidak suka", "korupsi"],
+        "Co Sarkastic": ["oh tentu", "iya benar", "seolah-olah", "oh hebat", "pasti", "benar sekali", "sangat meyakinkan", "tidak mungkin", "teruskan", "oh iya"]
+    },
+    "Zulkieflimansyah": {
+        "Co Likes": ["inspiratif", "cerdas", "berprestasi", "bagus", "terpuji"],
+        "Co Support": ["terbaik", "pemimpin inspiratif", "solid", "juara", "bijaksana"],
+        "Co Optimism": ["optimis", "harapan", "masa depan", "kemenangan", "perubahan positif"],
+        "Co Negative": ["tidak berprestasi", "isu korupsi", "lemah", "tidak mendukung"],
+        "Co Sarkastic": ["oh tentu", "iya benar", "seolah-olah", "oh hebat", "pasti", "benar sekali", "sangat meyakinkan", "tidak mungkin", "teruskan", "oh iya"]
+    },
+    "Lalu_Muhamad_Iqbal": {
+        "Co Likes": ["bagus", "baik", "keren", "disukai", "cocok"],
+        "Co Support": ["dukung", "pilih", "mantap", "semangat", "nomor satu", "hebat"],
+        "Co Optimism": ["harapan", "optimis", "menang", "sukses", "terbaik", "pemimpin"],
+        "Co Negative": ["fitnah", "bohong", "tidak suka", "jelek", "kalah", "buruk"],
+        "Co Sarkastic": ["oh tentu", "iya benar", "seolah-olah", "oh hebat", "pasti", "benar sekali", "sangat meyakinkan", "tidak mungkin", "teruskan", "oh iya"]
+    },
+    "Indah_Dhamayanti_Putri": {
+        "Co Likes": ["bagus", "menarik", "cocok", "cantik", "baik hati"],
+        "Co Support": ["semangat", "mantap", "pilihan tepat", "hebat", "menang", "dukung terus"],
+        "Co Optimism": ["sukses", "maju", "terbaik", "inspirasi", "masa depan", "optimis"],
+        "Co Negative": ["isu", "korupsi", "tidak baik", "cacat", "buruk", "jelek"],
+        "Co Sarkastic": ["oh tentu", "iya benar", "seolah-olah", "oh hebat", "pasti", "benar sekali", "sangat meyakinkan", "tidak mungkin", "teruskan", "oh iya"]
+    }
+}

ntb_dict.json ADDED Viewed

	@@ -0,0 +1,396 @@

+{
+    "gawe": "kerja",
+    "kepeng": "uang",
+    "mae": "datang",
+    "menyaman": "nyaman",
+    "bere": "berani",
+    "muter": "berjalan-jalan",
+    "endek": "tidak",
+    "lang": "belum",
+    "ngena": "makan",
+    "tongka": "pergi",
+    "nanem": "tanam",
+    "menteleng": "melihat",
+    "tepek": "tangan",
+    "dewe": "sendiri",
+    "sambel": "sambal",
+    "kene": "sini",
+    "bare": "baru",
+    "kek": "seperti",
+    "sedek": "sedikit",
+    "buin": "lagi",
+    "bareng": "bersama",
+    "beleng": "belok",
+    "reng": "orang",
+    "batur": "teman",
+    "lepok": "berbicara",
+    "gubuk": "rumah",
+    "lombok": "cabe",
+    "santun": "hormat",
+    "jelo": "jelek",
+    "susur": "bersih",
+    "laek": "naik",
+    "tembe": "kemudian",
+    "kereng": "keras",
+    "kajang": "jatuh",
+    "raos": "enak",
+    "tampah": "kotoran",
+    "engat": "ingat",
+    "ken": "kenal",
+    "baro": "kemarin",
+    "silo": "tidur",
+    "temek": "kecil",
+    "gole": "pergi",
+    "betuk": "buat",
+    "peng": "sakit",
+    "taman": "tambah",
+    "kunci": "kunci",
+    "sesu": "siap",
+    "pon": "sudah",
+    "kut": "kamu",
+    "gete": "besar",
+    "lingeh": "dengar",
+    "bueh": "jauh",
+    "male": "malu",
+    "pacong": "pelit",
+    "sate": "tidak ada",
+    "gati": "sangat",
+    "dase": "hidup",
+    "pukul": "pukul",
+    "rugu": "bodoh",
+    "tengaq": "tengah",
+    "juak": "jual",
+    "bijak": "bijak",
+    "seman": "sehat",
+    "masok": "masuk",
+    "lauk": "ikan",
+    "lengit": "hilang",
+    "pek": "samping",
+    "peteng": "gelap",
+    "rangkat": "angkat",
+    "sarak": "cepat",
+    "selak": "takut",
+    "tapok": "tutup",
+    "tepuk": "tangan",
+    "tere": "kiri",
+    "tuan": "tuan",
+    "ungak": "lompat",
+    "turun": "turun",
+    "waktu": "waktu",
+    "wuri": "belakang",
+    "yakin": "yakin",
+    "zaman": "zaman",
+    "nggawe": "sedang bekerja",
+    "ngena-ngena": "sedang makan",
+    "nanemin": "menanamkan",
+    "ngelingeh": "mendengarkan",
+    "nenga": "melihat",
+    "nengaq": "melihat",
+    "golet": "berpergian",
+    "lepokin": "membicarakan",
+    "betukin": "membuat",
+    "masukin": "memasukkan",
+    "jualin": "menjual",
+    "angkatin": "mengangkat",
+    "melangit": "melihat ke atas",
+    "nenggo": "menunggu",
+    "sedek-sedek": "sedikit-sedikit",
+    "bare-tek": "baru saja",
+    "lepok-lepok": "pembicaraan",
+    "dase-dase": "menghidupkan",
+    "paconge": "kepelitan",
+    "tapokin": "menutupkan",
+    "kerengin": "mengeraskan",
+    "silo-siloan": "sering tidur",
+    "ndek-nggawe": "tidak bekerja",
+    "kereng-kereng": "sangat keras",
+    "pacong-pacong": "sangat pelit",
+    "beleng-beleng": "belokan",
+    "tapok-tapok": "penutupan",
+    "kepeng-kepeng": "uang-uang",
+    "anake": "anaknya",
+    "nyong": "saya",
+    "ku": "aku",
+    "kit": "kita",
+    "iyong": "dia",
+    "nene": "mereka",
+    "geteh": "besar",
+    "alang": "tinggi",
+    "lendong": "lembut",
+    "ngele": "panas",
+    "se": "satu",
+    "due": "dua",
+    "telu": "tiga",
+    "empat": "empat",
+    "lima": "lima",
+    "enem": "enam",
+    "pitu": "tujuh",
+    "wolu": "delapan",
+    "sia": "sembilan",
+    "sepulu": "sepuluh",
+    "sewelas": "sebelas",
+    "duwelas": "dua belas",
+    "selikur": "dua puluh satu",
+    "telu likur": "dua puluh tiga",
+    "sekedik": "sedikit",
+    "bare-bare": "baru-baru",
+    "tembe-tembe": "nanti-nanti",
+    "reng-reng": "orang-orang",
+    "sambel-sambel": "bermacam-macam sambal",
+    "silo-silo": "berkali-kali tidur",
+    "sate-sate": "tidak ada sama sekali",
+    "gole-gole": "sering pergi",
+    "batur-batur": "teman-teman",
+    "ane": "saya",
+    "eto": "itu",
+    "maej": "mari",
+    "tangkong": "naik",
+    "tie": "di sana",
+    "skek": "sedikit",
+    "arik": "adik",
+    "tabah pribadi": "kuat secara pribadi",
+    "suhu": "guru",
+    "side": "anda",
+    "arak": "minuman keras",
+    "ruan": "ruang",
+    "paut": "ikat",
+    "jari": "jadi",
+    "penjuluk": "julukan",
+    "ndekn": "tidak (NTB)",
+    "care": "peduli",
+    "nenak": "enak",
+    "k'tuan": "tuan",
+    "ndek": "tidak",
+    "akak": "kakak",
+    "milu": "ikut",
+    "ust": "ustadz",
+    "laun": "pelan-pelan",
+    "mun": "kalau",
+    "wah": "wah",
+    "jadi": "jadi",
+    "gubernur": "gubernur",
+    "ja": "sudah",
+    "ngkah": "langkah",
+    "noglh": "menyusul",
+    "berbaur": "berbaur",
+    "karingan": "kering",
+    "aran": "nama",
+    "nane": "nama panggilan",
+    "kancen": "teman",
+    "nyalon": "calonkan diri",
+    "biase": "biasa",
+    "boyaq": "bohong",
+    "suare": "suara",
+    "lemaq": "bagus",
+    "ngengat": "memukul",
+    "bae": "baik",
+    "ndkn": "tidak (variant)",
+    "mle": "memulai",
+    "te": "ke sana",
+    "isik": "isi",
+    "sak": "sempit",
+    "iye": "iya",
+    "muk": "mulut",
+    "melek": "melek",
+    "ky": "seperti",
+    "kire": "kirikanan",
+    "jemaq": "banyak",
+    "seandaian": "seandainya",
+    "ne": "di sini",
+    "mele": "pergi",
+    "ye": "dia",
+    "malik": "balik",
+    "maraq": "semangat",
+    "ngini": "disini",
+    "perli": "sindir",
+    "melene": "lemah",
+    "ampok": "sampai",
+    "manto": "mantap",
+    "nge": "kamu",
+    "lalo": "pergi",
+    "ndk": "tidak",
+    "ta": "jangan",
+    "taok": "ke sana",
+    "pilen": "pemilu",
+    "min": "makanan ringan",
+    "dwg": "dengar",
+    "selebung": "tutup",
+    "enden": "endapkan",
+    "unin": "suara",
+    "mule": "pulang",
+    "lamun": "jika",
+    "ndkmn": "tidak mungkin",
+    "pilek": "pemilu",
+    "jak": "pergi",
+    "wayen": "waktu",
+    "pesilak": "minta tolong",
+    "balen": "kembali",
+    "pastin": "pastikan",
+    "laguk": "lagu",
+    "poton": "potong",
+    "idungm": "hidung",
+    "lamper": "lampirkan",
+    "sik": "juga",
+    "gemes": "tertarik",
+    "pete": "kacang panjang",
+    "yg": "yang",
+    "geratis": "gratis",
+    "melak": "melakukan",
+    "wahm": "wah",
+    "abotk": "berat",
+    "eak": "iya",
+    "belecok": "berbelok",
+    "mauk": "masuk",
+    "bdoe": "bodoh",
+    "mesak": "merasa",
+    "kentok": "kena",
+    "nani": "nanti",
+    "melen": "mendengar",
+    "besile": "berita",
+    "kance": "teman",
+    "gub": "daerah",
+    "bedengah": "tengah",
+    "lirimn": "lihat",
+    "wea": "anda",
+    "adoo": "ada",
+    "tenak": "makan",
+    "tye": "siapa",
+    "juluk": "julukan",
+    "peneng": "tenang",
+    "ampureeee": "maafkan",
+    "eku": "aku",
+    "loq": "siapa",
+    "maukn": "mau",
+    "angen": "bisa",
+    "kake": "takut",
+    "seragem": "seragam",
+    "senu": "biasa",
+    "keruan": "sangat",
+    "tepileq": "bisa",
+    "taon": "tahun",
+    "man": "saya",
+    "dait": "kait",
+    "sengak": "pintar",
+    "uah": "wah",
+    "surukm": "suruh",
+    "lasing": "berlaku",
+    "komenank": "komentar",
+    "jage": "jaga",
+    "melem": "makan",
+    "mako": "maaf",
+    "pileklah": "sudah",
+    "sdh": "sudah",
+    "permakoan": "pergaulan",
+    "ape": "apa",
+    "ite": "itu",
+    "jakm": "jaket",
+    "sai": "saya",
+    "maseh": "masih",
+    "maukm": "mau",
+    "timak": "ambil",
+    "auk": "satu",
+    "an": "saya",
+    "tadahn": "menangkap",
+    "kenak": "kena",
+    "berugak": "berdiri",
+    "elen": "lihat",
+    "setil": "segala",
+    "heh": "hei",
+    "kanatooo": "kenapa",
+    "made": "sudah",
+    "mpoipu": "mencari",
+    "panjamba": "panjang",
+    "ncau": "cau",
+    "rew": "redha",
+    "ur": "mau",
+    "karukumi": "berkurang",
+    "lokina": "di sini",
+    "wara": "uang",
+    "tanda-tanda": "tanda",
+    "ompu": "panggil",
+    "suki": "suka",
+    "doho": "bisa",
+    "ede": "di",
+    "na": "ada",
+    "noro": "apa",
+    "weaku": "aku",
+    "ragam": "beragam",
+    "ndi": "itu",
+    "aumu": "saya",
+    "ba": "baik",
+    "ma": "ya",
+    "meta": "mata",
+    "de": "di",
+    "bolpoin": "pulpen",
+    "wa": "wah",
+    "mpoi": "sampai",
+    "ba loan": "tidak ada",
+    "dahu": "kebun",
+    "k ntuwu": "kuat",
+    "weki": "hai",
+    "dou doho": "sangat",
+    "ringu": "melihat",
+    "aka": "sebutan",
+    "ncau re": "cau",
+    "ina": "ibu",
+    "mpanga": "mendengar",
+    "au": "saya",
+    "baba": "ayah",
+    "pala": "kepala",
+    "ngahi": "indah",
+    "hafa": "terus",
+    "karaka": "gampang",
+    "podaku": "saya",
+    "ne'e": "disini",
+    "wati": "perempuan",
+    "dahuna": "ada",
+    "loko": "tangan",
+    "ro": "sana",
+    "waura": "tempat",
+    "mbuku": "buku",
+    "konee": "kamu",
+    "matundu": "kebun",
+    "piti": "kecil",
+    "mudh": "mudah",
+    "progrm": "program",
+    "kturunanx": "turunan",
+    "ndiha": "disana",
+    "ece": "anak",
+    "kamanae": "kemana",
+    "ngomi": "ngomong",
+    "malao": "berlari",
+    "ipi": "ujung",
+    "sangufi": "bisa",
+    "hambu": "sangat",
+    "hondo": "berasa",
+    "langgengkan": "terus",
+    "jelung": "terkenal",
+    "kece": "keren",
+    "nggih": "ya",
+    "mlang": "jalan",
+    "tepung": "ketemu",
+    "ketok": "kelihatan",
+    "tamba": "obat",
+    "tulung": "tolong",
+    "wet": "air",
+    "ndemek": "menyentuh",
+    "nyandak": "mengambil",
+    "mbet": "memeluk",
+    "tepe": "mendorong",
+    "kliru": "salah",
+    "luweh": "lebih",
+    "akeh": "banyak",
+    "cemeng": "hitam",
+    "abang": "merah",
+    "jembar": "luas",
+    "ngombe": "minum",
+    "nyonggo": "membawa",
+    "nyilih": "meminjam",
+    "krempyeng-krempyeng": "sedikit demi sedikit",
+    "tekuk-tekuk": "membungkuk-bungkuk",
+    "sampeyan": "kamu (halus)",
+    "awak": "badan",
+    "satus": "seratus",
+    "sewu": "seribu",
+    "sejuta": "sejuta"
+}

tfidf_vectorizer_indah_dhamayanti_putri.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:928fa30e9c66bd841663f7cef0c25adbbce5e51031219779a226eae424b63783
+size 24377

tfidf_vectorizer_lalu_muhamad_iqbal.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86d123b6d2e126182603c7727ac6d9afa98b5598e4f92d5053070d86f3090ae7
+size 68943

tfidf_vectorizer_m_suhaili.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ccdd6525e6fb4b9e0e2ea42734e4b8945ab7cc2f38a708cf6afab3bd0272c4
+size 36399

tfidf_vectorizer_musyafirin.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93d68cfb116d6688f4b797bb1d46e37701d66102cefdaaf9bcb9c24f737402ae
+size 41568

tfidf_vectorizer_sitti_rohmi_djalilah.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90e500ec5c0ec3d1982e0a4fa33df38d8f34449cab9d1e4d89d267a83a546cae
+size 55027

tfidf_vectorizer_zulkieflimansyah.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5036b20528b2b6b9cd9651573d564d219292d07c1f0817a90fd761410e42ed6
+size 48165