ahmadardhy commited on
Commit
40da6f6
·
verified ·
1 Parent(s): 2b8eb9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +783 -314
app.py CHANGED
@@ -10,26 +10,14 @@ from collections import Counter
10
  import tensorflow as tf
11
  from transformers import TFBertForSequenceClassification, BertTokenizer
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
- from sklearn.metrics.pairwise import cosine_similarity
14
  from sklearn.model_selection import train_test_split
 
 
 
15
 
16
  # Muat data kamus
17
  df_kamus_komen1 = pd.read_excel('data_komen_mundjidah_clean.xlsx') # Kamus 1
18
- df_kamus_komen3 = pd.read_excel('data_komen_warsubi_clean-v1.xlsx') # Kamus 3
19
-
20
- # Daftar kata kunci negatif dan positif
21
- negative_keywords_model1 = ["pilih nomor dua", "nomor dua", "buruk", "jelek", "✌️", "dua", "jalan rusak", "leren", "perubahan", "ganti bupati", "warsa", "abah", "janji manis", "omong tok", "nyocot", "bacot"]
22
- negative_keywords_model2 = ["pilih nomor satu", "nomor satu", "buruk", "jelek","☝️"]
23
- negative_keywords_model3 = ["buruk", "jelek", "☝️", "golput", "serang", "mundjidah", "janji manis", "omong tok", "nyocot", "bacot", "carmuk","cari muka"]
24
-
25
- positive_keywords_model1 = ["semoga menang", "semoga", "baik", "bagus", "terbaik", "semangat", "mundjidah", "amin", "gas"]
26
- positive_keywords_model2 = ["hebat", "luar biasa", "bagus", "terbaik", "memilih dengan tepat", "all in abah subi", "pilih warsubi"]
27
- positive_keywords_model3 = ["hebat", "luar biasa", "bagus", "terbaik", "memilih dengan tepat", "all in abah subi", "pilih warsubi", "coblos", "dukung", "pilih", "semangat" , "allahuakbar","subhanallah","gus kautsar", "pemimpin", "gus", "pendherek",
28
- "salam dua jari", "pemimpin baru", "alhamdulillah","salam","sowan", "waalaikumsalam", "tambah maju", "tambah sejahtera", "makin maju", "makin sejahtera", "makin apik","hadir", "sip", "jos", "mantap bah",
29
- "warsa", "warsubi", "warsa bupatiku", "setuju", "dukung abah", "abah", "dua", "nomor dua", "amin", "gas", "ayo dukung", "warsubi tok", "semoga menang", "warsa ae", "warsa ae liane up", "tiang sae","bantu","beri","kasih",
30
- "selamat","pasti menang", "assalamualaikum", "unggul", "telak", "perubahan", "semoga", "warga sejahtera", "semakin sejahtera", "tambah apik", "ganti bupati","ngayomi", "alhamdulillah","barokalloh", "pilih abah", "pilih warsa",
31
- "aamiin", "bismilah", "pasti menang", "bismillah", "aamiin", "calon pemimpin", "dukung abah subi", "alhamdulillah", "masyaallah","mashaallah", "menang", "pemimpin", "warsah", "lanjutkan abah", "lanjutkan"
32
- "semangat", "optimis", "semoga", "yakin", "amanah", "mantap", "mantab", "komitmen", "mengayomi","merangkul","bupati","calon bupati","bupati", "bukan pencitraan", "dermawan", "bantuan", "no dua", "no ✌️"]
33
 
34
  # Fungsi untuk memuat kamus normalisasi dari file lokal
35
  def load_normalization_dict(file_path):
@@ -77,34 +65,192 @@ def remove_usernames(comment, usernames):
77
  pattern = rf'\b{re.escape(username)}\b'
78
  comment = re.sub(pattern, '', comment, flags=re.IGNORECASE)
79
  return re.sub(r'\s+', ' ', comment.strip())
80
-
81
  # Fungsi untuk membersihkan teks
82
  def clean_text(text):
83
  text = str(text)
84
- text = re.sub(r'http[s]?://\S+', '', text)
85
- text = re.sub(r'@\w+|#\w+', '', text)
 
 
 
 
86
  text = re.sub(r'\b(01|1)\b', 'satu', text)
87
  text = re.sub(r'\b(02|2)\b', 'dua', text)
 
 
88
  text = re.sub(r'\b\d+\b', '', text)
89
- text = text.lower().strip()
90
- return re.sub(r'[^a-zA-Z0-9✌️☝️ ]', '', text)
 
 
 
 
 
 
 
 
 
 
 
 
91
 
 
92
 
93
- # Fungsi untuk memperbarui kamus
94
- def update_kamus(file_path, new_data):
95
  try:
96
- existing_data = pd.read_excel(file_path) # Muat data kamus yang ada
97
- combined_data = pd.concat([existing_data, new_data]).drop_duplicates(subset=["Comment"]) # Hindari duplikasi
98
- combined_data.to_excel(file_path, index=False) # Simpan kembali ke file
99
- st.success(f"Kamus berhasil diperbarui dengan {len(new_data)} data baru.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
- st.error(f"Gagal memperbarui kamus: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
 
 
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # Tambahkan opsi di sidebar
105
- menu = st.sidebar.selectbox("Pilih Menu", ["Klasifikasi Sentimen", "Editor Kamus"])
106
 
107
- if menu == "Klasifikasi Sentimen":
108
  # Streamlit app
109
  st.title("Aplikasi Klasifikasi Sentimen dan Brand Attitude")
110
 
@@ -121,6 +267,8 @@ if menu == "Klasifikasi Sentimen":
121
  data = pd.read_excel(uploaded_file)
122
  elif uploaded_file.name.endswith('.csv'):
123
  data = pd.read_csv(uploaded_file)
 
 
124
 
125
  # Bersihkan data
126
  data.dropna(how='all', inplace=True)
@@ -131,28 +279,36 @@ if menu == "Klasifikasi Sentimen":
131
  known_usernames = get_known_usernames(data)
132
  data["Cleaned_Text"] = data["Comment"].apply(lambda x: remove_usernames(x, known_usernames))
133
  data["Cleaned_Text"] = data["Cleaned_Text"].apply(lambda x: normalize_text(clean_text(x), normalization_dict))
134
-
 
 
 
 
 
135
  # Konfigurasi model berdasarkan pilihan
136
  if model_choice == "Model Mundjidah":
137
  sentiment_model_path = "mundjidah-model.h5"
138
  ba_model_path = "ba-mundjidah-model.h5"
139
- positive_keywords = ["semoga menang", "semoga", "baik", "bagus", "terbaik", "semangat", "mundjidah", "amin", "gas"]
140
- negative_keywords = ["pilih nomor dua", "nomor dua", "buruk", "jelek", "✌️", "dua", "jalan rusak", "dalan rusak", "leren", "perubahan", "ganti bupati"]
141
-
 
142
  elif model_choice == "Model Warsubi V1":
143
  sentiment_model_path = "warsa-model.h5"
144
  ba_model_path = "ba-warsa-model.h5"
145
- positive_keywords = ["hebat", "luar biasa", "bagus", "terbaik", "memilih dengan tepat", "all in abah subi", "pilih warsubi", "dua", "✌️", "abah", "sae","sehat","semangat", "podo"]
146
- negative_keywords = ["pilih nomor satu", "nomor satu", "buruk", "jelek", "☝️", "golput ae", "serang", "semaput", "janji manis", "nyocot", "bacot", "carmuk", "spg", "bojo loro", "gak entuk","perangkat desa",
147
- "tidak onok","tidak diwei","perangkat", "tidak ada","rung oleh","gong enek", "belum dapet","belum dapat"]
148
-
149
  else: # Tambahan untuk model lain
150
  sentiment_model_path = "warsubi-v2-model.h5"
151
  ba_model_path = "ba-warsubi-v2-model.h5"
152
- positive_keywords = ["hebat", "luar biasa", "bagus", "terbaik", "coblos", "dukung", "pilih", "semangat"]
153
- negative_keywords = ["golput ae", "serang", "mundjidah", "janji manis", "nyocot", "bacot", "carmuk"]
154
 
155
  PRE_TRAINED_MODEL = 'indobenchmark/indobert-base-p2'
 
 
156
  # Load model sentimen
157
  try:
158
  sentiment_model = TFBertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL, num_labels=3)
@@ -164,10 +320,9 @@ if menu == "Klasifikasi Sentimen":
164
 
165
  # Fungsi prediksi sentimen dengan tambahan pencocokan keyword
166
  def predict_with_sentiment_model(text):
167
- # Pencocokan keyword
168
  if any(keyword.lower() in text.lower() for keyword in positive_keywords):
169
  return 'positive'
170
- elif any(keyword.lower() in text.lower() for keyword in negative_keywords):
171
  return 'negative'
172
 
173
  # Prediksi menggunakan model jika tidak ada keyword yang cocok
@@ -186,49 +341,37 @@ if menu == "Klasifikasi Sentimen":
186
  except Exception as e:
187
  st.error(f"Gagal memuat model Brand Attitude: {e}")
188
  st.stop()
189
-
190
- # Daftar keyword untuk masing-masing kategori
191
- keywords = {
192
- "Co-Optimism": ["semoga sehat selalu", "semoga sukses", "lanjutkan", "semangat", "sehat", "setuju", "ayo", "selamat", "sukses",
193
- "semoga", "berharap", "mugo", "lebih maju", "optimis jombang satu", "bangga", "saget", "doa", "tambah maju",
194
- "lebih maju", "tambah makmur", "tambah sejahtera", "majukan", "harap", "berharap", "menginginkan", "ingin",
195
- "mendoakan", "sae bah", "bismilah", "cocok", "umkm maju", "butuh perubahan", "butuh ganti bupati", "memakmurkan",
196
- "makmur", "buka lapangan kerja", "lancar", "lancar terus", "mugi", "bantuan", "sembako", "lebih baik", "tambah apik",
197
- "sae", "tambah sae", "jombang maju bersama warsa", "jombang maju", "sejahtera", "yakin", "makin",
198
- "optimis", "salam","jombang sejahtera","tambah sejahtera", "butuh pemimpin","bismillah", "warsa menang",
199
- "menanti pemimpin", "bakalan maju", "bakalan sejahtera", "bakalan sukses","yakin", "majukan", "majulah", "doakan"],
200
-
201
- "Co-Support": ["siap dukung", "all in", "menyala", "siap", "dukung", "gas", "warsa", "menang", "coblos", "coblos dua",
202
- "ayo", "pilih dua", "pilih", "wonge abah", "warsubi tok", "merangkul", "program", "konkrit", "wong apik",
203
- "baik", "niat apik", "merakyat", "mengayomi", "komitmen", "merangkul", "mendengar", "dengar", "panggah abah",
204
- "panggah warsa", "antusias", "komitmen", "kebersamaan", "dukung abah", "dengan abah", "program konkrit", "abah satu",
205
- "jombang satu", "orang baik", "pilih abah", "pilih warsa", "wonge abah", "ngopeni ngayomi mumpuni", "melu",
206
- "tambah adem", "tambah sejuk", "dukung usaha", "no dua", "dukung umkm", "dukung ekonomi", "pendherek", "penderek",
207
- "pengikut", "bismilah abah", "abah dua", "hadir support", "nggih", "turun tangan", "membantu", "bertindak",
208
- "melaju", "program", "membantu", "bupati", "joss", "top", "jombang maju", "wayae", "wayahe", "maju", "mantap",
209
- "abah", "bah", "ganti bupati", "sodaqoh", "wayahe ganti", "ganti", "meledak", "menyala", "dibutuhkan", "kawal",
210
- "membara", "seru", "keren", "mantap", "istimewa", "ayo", "layak", "al in", "makin raket", "kerja nyata",
211
- "selalu dihati", "pangah abah", "pangah warsa", "kebersaman", "dermawan", "sat set", "wat wet", "panggah abah",
212
- "panggah warsa", "pangah warsa", "pangah", "wonge abah", "positif menang", "pemimpin", "wong mu"]
213
-
214
- }
215
 
216
- # Fungsi prediksi BA dengan tambahan pencocokan keyword
217
- def predict_ba_with_model(text):
218
- # Mengecek apakah teks mengandung kata-kata kunci dari kategori Co-Support atau Co-Optimism
219
  for label, keywords_list in keywords.items():
220
  if any(keyword.lower() in text.lower() for keyword in keywords_list):
221
- return label # Jika ada keyword yang cocok, kembalikan label yang sesuai
222
 
223
  # Jika tidak ada keyword yang cocok, gunakan model untuk prediksi
224
  inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
225
  outputs = ba_model(inputs)
226
  logits = outputs.logits
227
- predicted_label = tf.argmax(logits, axis=1).numpy()[0]
228
- return ['Co-Likes', 'Co-Support', 'Co-Optimism', 'Co-Negative'][predicted_label]
 
 
 
 
 
 
 
 
229
 
230
- data['Brand_Attitude'] = data['Cleaned_Text'].apply(predict_ba_with_model)
231
 
 
 
 
 
 
 
 
 
232
  # Tambahkan "Co-Negative" jika Sentimen_Prediksi adalah "negative"
233
  data['Brand_Attitude'] = data.apply(
234
  lambda row: "Co-Negative" if row['Sentimen_Prediksi'] == 'negative' else row['Brand_Attitude'], axis=1
@@ -238,273 +381,599 @@ if menu == "Klasifikasi Sentimen":
238
  data['Brand_Attitude'] = data.apply(
239
  lambda row: "Co-Likes" if row['Sentimen_Prediksi'] != 'negative' and row['Brand_Attitude'] == 'Co-Negative' else row['Brand_Attitude'], axis=1
240
  )
241
-
242
- # Tampilkan hasil
243
- st.write("Hasil Klasifikasi Sentimen:")
244
- st.dataframe(data[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi', 'Brand_Attitude']])
245
-
246
- # Distribusi sentimen
247
- sentiment_counts = data['Sentimen_Prediksi'].value_counts()
248
- # Hitung jumlah sentimen
249
- total_positive = sentiment_counts.get('positive', 0)
250
- total_negative = sentiment_counts.get('negative', 0)
251
- total_neutral = sentiment_counts.get('neutral', 0)
252
-
253
- # Tampilkan total jumlah sentimen
254
- st.write(f"**Total Sentimen Positif:** {total_positive}")
255
- st.write(f"**Total Sentimen Negatif:** {total_negative}")
256
- st.write(f"**Total Sentimen Netral:** {total_neutral}")
257
-
258
- # Distribusi level komentar
259
- st.write("Distribusi Level Komentar:")
260
- level_counts = data['Brand_Attitude'].value_counts()
261
- total_co_likes = level_counts.get('Co-Likes', 0)
262
- total_co_support = level_counts.get('Co-Support', 0)
263
- total_co_optimism = level_counts.get('Co-Optimism', 0)
264
- total_co_negative = level_counts.get('Co-Negative', 0)
265
-
266
- # Tampilkan total jumlah sentimen
267
- st.write(f"**Total BA Co-Likes:** {total_co_likes}")
268
- st.write(f"**Total BA Co-Support:** {total_co_support}")
269
- st.write(f"**Total BA Co-Optimism:** {total_co_optimism}")
270
- st.write(f"**Total BA Co-Negative:** {total_co_negative}")
271
-
272
- # Visualisasi distribusi sentimen
273
- fig, ax = plt.subplots()
274
- sentiment_counts.plot(kind='bar', ax=ax, color='skyblue')
275
- ax.set_title("Distribusi Sentimen")
276
- ax.set_xlabel("Sentimen")
277
- ax.set_ylabel("Jumlah Komentar")
278
- st.pyplot(fig)
279
-
280
- # Tampilkan jumlah setiap kategori
281
- st.bar_chart(level_counts)
282
-
283
- def generate_wordcloud(text):
284
- wordcloud = WordCloud(
285
- width=800,
286
- height=400,
287
- background_color='white',
288
- max_words=200,
289
- colormap='viridis'
290
- ).generate(text)
291
- fig, ax = plt.subplots(figsize=(10, 5))
292
- ax.imshow(wordcloud, interpolation='bilinear')
293
- ax.axis('off')
294
- return fig
295
-
296
- st.write("WordCloud Berdasarkan Sentimen:")
297
- for sentiment in ['positive', 'negative', 'neutral']:
298
- text = " ".join(data[data['Sentimen_Prediksi'] == sentiment]['Cleaned_Text'].tolist())
299
- if text:
300
- st.write(f"WordCloud untuk Sentimen {sentiment.capitalize()}:")
301
- st.pyplot(generate_wordcloud(text))
302
-
303
- # Tampilkan kalimat berdasarkan sentimen
304
- st.write("Kalimat Berdasarkan Sentimen:")
305
- st.write("### Kalimat Positif")
306
- st.write(data[data['Sentimen_Prediksi'] == 'positive']['Comment'].tolist())
307
-
308
- st.write("### Kalimat Negatif")
309
- st.write(data[data['Sentimen_Prediksi'] == 'negative']['Comment'].tolist())
310
-
311
- st.write("### Kalimat Netral")
312
- st.write(data[data['Sentimen_Prediksi'] == 'neutral']['Comment'].tolist())
313
-
314
- # Fungsi untuk tokenisasi teks
315
- def tokenize_text(text):
316
- """Membersihkan dan memisahkan teks menjadi kata-kata."""
317
- # Hilangkan tanda baca, konversi ke huruf kecil, dan split
318
- words = text.lower().replace('.', '').replace(',', '').split()
319
- return words
320
-
321
- # Fungsi untuk menghitung frekuensi kata
322
- def get_word_frequencies(data, column):
323
- """Menghitung frekuensi kata berdasarkan kolom teks tertentu."""
324
- all_words = []
325
- for text in data[column]:
326
- all_words.extend(tokenize_text(text))
327
- return Counter(all_words)
328
-
329
- # Filter data berdasarkan kategori
330
- neutral_data = data[data['Sentimen_Prediksi'] == 'neutral']
331
- co_likes_data = data[data['Brand_Attitude'] == 'Co-Likes']
332
-
333
- # Hitung frekuensi kata untuk masing-masing kategori
334
- neutral_word_counts = get_word_frequencies(neutral_data, 'Cleaned_Text')
335
- co_likes_word_counts = get_word_frequencies(co_likes_data, 'Cleaned_Text')
336
-
337
- # Visualisasi chart untuk kata-kata di sentimen neutral
338
- st.write("### Top Kata di Sentimen Neutral")
339
- neutral_most_common = neutral_word_counts.most_common(10)
340
- neutral_words, neutral_counts = zip(*neutral_most_common)
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  plt.figure(figsize=(10, 6))
343
- plt.barh(neutral_words, neutral_counts, color='skyblue')
344
  plt.xlabel('Frequency')
345
  plt.ylabel('Words')
346
- plt.title('Top Words in Neutral Sentiment')
347
  plt.gca().invert_yaxis()
348
  st.pyplot(plt)
349
-
350
- # Visualisasi chart untuk kata-kata di Co-Likes
351
- st.write("### Top Kata di BA Co-Likes")
352
- co_likes_most_common = co_likes_word_counts.most_common(10)
353
- co_likes_words, co_likes_counts = zip(*co_likes_most_common)
354
-
 
 
 
355
  plt.figure(figsize=(10, 6))
356
- plt.barh(co_likes_words, co_likes_counts, color='lightgreen')
357
  plt.xlabel('Frequency')
358
  plt.ylabel('Words')
359
- plt.title('Top Words in Co-Likes Category')
360
  plt.gca().invert_yaxis()
361
  st.pyplot(plt)
362
-
363
- # Siapkan data untuk diperbarui
364
- new_data = data[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi']].copy()
365
- new_data.rename(columns={'Sentimen_Prediksi': 'Sentimen_Aktual'}, inplace=True)
366
-
367
- # Fungsi untuk mencari komentar yang mirip
368
- def find_similar_comments(data, query_text, top_n=5):
369
- # Membuat representasi TF-IDF dari teks
370
- vectorizer = TfidfVectorizer(stop_words='english')
371
- tfidf_matrix = vectorizer.fit_transform(data['Cleaned_Text'])
372
-
373
- # Mencari query dalam database
374
- query_tfidf = vectorizer.transform([query_text])
375
-
376
- # Menghitung cosine similarity
377
- similarity_scores = cosine_similarity(query_tfidf, tfidf_matrix)
378
-
379
- # Menambahkan similarity ke dataframe
380
- data['similarity'] = similarity_scores[0]
381
-
382
- # Mengurutkan berdasarkan similarity tertinggi
383
- similar_comments = data.sort_values(by='similarity', ascending=False).head(top_n)
384
-
385
- return similar_comments
386
-
387
- # Menampilkan data komentar yang mirip
388
- st.write("Komentar yang Mirip dengan Sentimen yang Akan Diperbarui")
389
- similar_comments = find_similar_comments(data, "Komentar yang ingin diubah sentimennya", top_n=5)
390
- st.dataframe(similar_comments[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi', 'similarity']])
391
-
392
- # Menampilkan kolom input untuk mengubah sentimen dan brand attitude
393
- new_sentiment = st.selectbox("Pilih Sentimen Baru", ['positive', 'negative', 'neutral'])
394
- new_brand_attitude = st.selectbox("Pilih Brand Attitude Baru", ['Co-Likes', 'Co-Support', 'Co-Optimism', 'Co-Negative'])
395
-
396
- # Tombol untuk memperbarui sentimen dan brand attitude
397
- if st.button("Perbarui Sentimen dan Brand Attitude"):
398
- updated_comments = similar_comments.copy()
399
- updated_comments['Sentimen_Aktual'] = new_sentiment
400
- updated_comments['Brand_Attitude'] = new_brand_attitude
401
-
402
- # Update data di database atau dataframe
403
- # Misalnya, jika data disimpan dalam DataFrame `data`
404
- for index, row in updated_comments.iterrows():
405
- data.loc[data['Cleaned_Text'] == row['Cleaned_Text'], 'Sentimen_Aktual'] = row['Sentimen_Aktual']
406
- data.loc[data['Cleaned_Text'] == row['Cleaned_Text'], 'Brand_Attitude'] = row['Brand_Attitude']
407
-
408
- st.success("Sentimen dan Brand Attitude berhasil diperbarui!")
409
-
410
- # # Menyimpan setiap baris ke dalam database
411
- # for index, row in new_data.iterrows():
412
- # comment = row['Comment']
413
- # cleaned_text = row['Cleaned_Text']
414
- # sentimen_aktual = row['Sentimen_Aktual']
415
-
416
- # # Tambahkan tombol untuk memperbarui kamus
417
- # if st.button("Perbarui Kamus"):
418
- # new_data = data[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi']].copy()
419
- # new_data.rename(columns={'Sentimen_Prediksi': 'Sentimen_Aktual'}, inplace=True)
420
- # update_kamus(selected_file, new_data)
421
-
422
- except Exception as e:
423
- st.error(f"Terjadi kesalahan: {e}")
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
- # Definisikan hyperparameter
427
- PRE_TRAINED_MODEL = 'indobenchmark/indobert-base-p2'
428
- EPOCHS = 5
429
- BATCH_SIZE = 32
430
- LEARNING_RATE = 1e-5
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
- # Fungsi untuk melatih ulang model
433
- def retrain_model(kamus_data):
434
- # Siapkan data
435
- X = kamus_data['Cleaned_Text']
436
- y = kamus_data['Sentimen_Aktual']
437
 
438
- # Mengganti label secara manual (tanpa LabelEncoder)
439
- y = y.apply(lambda label: 0 if label == 'negative' else (1 if label == 'positive' else 2))
 
440
 
441
- # Split data menjadi training dan testing
442
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
- # Tokenisasi dan padding (BERT tokenizer)
445
- tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL)
446
- X_train_tokens = tokenizer(list(X_train), padding=True, truncation=True, max_length=128, return_tensors='tf')
447
- X_test_tokens = tokenizer(list(X_test), padding=True, truncation=True, max_length=128, return_tensors='tf')
448
 
449
- model_path = ''
450
- if kamus_data == "data_komen_mundjidah_clean.xlsx":
451
- model_path = 'update_mundjidah-model.h5'
452
- elif kamus_data == "data_komen_warsubi_clean-v1.xlsx":
453
- model_path = 'update_warsubi-model.h5'
454
-
455
- # Load model BERT untuk Sequence Classification
456
- bert_model = TFBertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL, num_labels=3)
457
 
458
- # Tentukan optimizer dan loss function
459
- optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
460
- loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
461
- metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
 
462
 
463
- # Compile model
464
- bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
 
465
 
466
- # Latih model
467
- bert_model.fit(
468
- X_train_tokens['input_ids'], y_train,
469
- epochs=EPOCHS,
470
- batch_size=BATCH_SIZE,
471
- validation_data=(X_test_tokens['input_ids'], y_test)
472
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
 
474
- # Simpan model yang sudah dilatih
475
- bert_model.save(model_path)
 
476
 
477
- st.success("Model berhasil dilatih ulang dan disimpan!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
- if menu == "Editor Kamus":
480
- st.title("Editor Kamus")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  kamus_option = st.selectbox(
482
  "Pilih Kamus yang Ingin Diedit:",
483
  ["data_komen_mundjidah_clean.xlsx", "data_komen_warsubi_clean-v1.xlsx"]
484
  )
485
 
486
- # Validasi pilihan kamus
487
- if kamus_option in ["data_komen_mundjidah_clean.xlsx", "data_komen_warsubi_clean-v1.xlsx"]:
488
- # Muat file kamus dari Excel
489
- try:
490
- kamus_data = pd.read_excel(kamus_option)
491
-
492
- st.write("Kamus Saat Ini:")
493
- # Tampilkan tabel yang dapat diedit
494
- edited_data = st.data_editor(
495
- kamus_data,
496
- use_container_width=True,
497
- height=500
498
- )
499
-
500
- # Tombol untuk menyimpan perubahan
501
- if st.button("Simpan Perubahan"):
502
- edited_data.to_excel(kamus_option, index=False)
503
- st.success("Perubahan berhasil disimpan ke file Excel!")
504
 
505
- # Tombol untuk retrain model
506
- if st.button("Retrain Model"):
507
- retrain_model(kamus_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
- except Exception as e:
510
- st.error(f"Terjadi kesalahan saat memuat atau menyimpan kamus: {e}")
 
10
  import tensorflow as tf
11
  from transformers import TFBertForSequenceClassification, BertTokenizer
12
  from sklearn.feature_extraction.text import TfidfVectorizer
 
13
  from sklearn.model_selection import train_test_split
14
+ import unicodedata
15
+ from sklearn.cluster import KMeans
16
+ import datetime
17
 
18
  # Muat data kamus
19
  df_kamus_komen1 = pd.read_excel('data_komen_mundjidah_clean.xlsx') # Kamus 1
20
+ df_kamus_komen2 = pd.read_excel('data_komen_warsubi_clean-v1.xlsx') # Kamus 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Fungsi untuk memuat kamus normalisasi dari file lokal
23
  def load_normalization_dict(file_path):
 
65
  pattern = rf'\b{re.escape(username)}\b'
66
  comment = re.sub(pattern, '', comment, flags=re.IGNORECASE)
67
  return re.sub(r'\s+', ' ', comment.strip())
68
+
69
  # Fungsi untuk membersihkan teks
70
  def clean_text(text):
71
  text = str(text)
72
+
73
+ # Menghapus URL dan mention serta hashtag
74
+ text = re.sub(r'http[s]?://\S+', '', text) # Hapus URL
75
+ text = re.sub(r'@\w+|#\w+', '', text) # Hapus mention dan hashtag
76
+
77
+ # Mengganti angka tertentu menjadi kata
78
  text = re.sub(r'\b(01|1)\b', 'satu', text)
79
  text = re.sub(r'\b(02|2)\b', 'dua', text)
80
+
81
+ # Menghapus angka lainnya
82
  text = re.sub(r'\b\d+\b', '', text)
83
+
84
+ # Mengonversi karakter-karakter matematis atau bold menjadi karakter normal
85
+ text = unicodedata.normalize('NFKD', text) # Normalisasi karakter
86
+
87
+ # Mengganti tanda baca (.,!?;:) dan emoji tertentu dengan spasi (' ')
88
+ text = re.sub(r'[.,!?;:]', ' ', text) # Ganti tanda baca tertentu dengan spasi
89
+ text = re.sub(r'[🔥✨❤️]', ' ', text) # Ganti emoji spesifik dengan spasi
90
+
91
+ # Menghapus karakter yang tidak diinginkan kecuali huruf, angka, emoji ✌️ dan ☝️
92
+ text = re.sub(r'[^\w\s\u2700-\u27BF\u2B50\u00A9\u00AE✌️☝️]', '', text)
93
+
94
+ # Menurunkan huruf menjadi huruf kecil dan menghapus spasi ekstra
95
+ text = text.lower()
96
+ text = re.sub(r'\s+', ' ', text).strip() # Menghapus spasi berlebihan
97
 
98
+ return text
99
 
100
+ def load_slang_dict(file_path):
 
101
  try:
102
+ with open(file_path, 'r', encoding='utf-8') as file:
103
+ lines = file.readlines()
104
+ slang_dict = {}
105
+ for line in lines:
106
+ line = line.strip()
107
+ if ':' in line: # Memastikan format key:value
108
+ key, value = line.split(':', 1) # Pisahkan berdasarkan ':'
109
+ key = key.strip('"').strip() # Hapus tanda kutip pada key dan spasi ekstra
110
+ value = value.strip('",').strip() # Hapus tanda kutip dan koma pada value
111
+ slang_dict[key] = value
112
+ return slang_dict
113
+ except Exception as e:
114
+ st.error(f"Terjadi kesalahan saat membaca file slang.txt: {e}")
115
+ return {}
116
+
117
+ # Muat kamus normalisasi dari file lokal
118
+ normalization_file = "slang.txt"
119
+ normalization_dict = load_normalization_dict(normalization_file)
120
+
121
+ def save_slang_dict(slang_dict, file_path):
122
+ try:
123
+ with open(file_path, 'w', encoding='utf-8') as file:
124
+ for key, value in slang_dict.items():
125
+ # Tulis setiap pasangan key-value dalam format "key":"value"
126
+ file.write(f'"{key}":"{value}",\n')
127
+ st.success("Kamus normalisasi berhasil disimpan!")
128
  except Exception as e:
129
+ st.error(f"Terjadi kesalahan saat menyimpan file slang.txt: {e}")
130
+
131
+ def load_keywords(file_path):
132
+ """Membaca keywords dari file txt dengan format kategori."""
133
+ keywords = {}
134
+ with open(file_path, 'r', encoding='utf-8') as f:
135
+ current_category = None
136
+ for line in f:
137
+ line = line.strip()
138
+ if re.match(r'^\[.*\]$', line): # Mendeteksi kategori seperti [Co-Optimism]
139
+ current_category = line.strip('[]')
140
+ keywords[current_category] = []
141
+ elif current_category and line:
142
+ keywords[current_category].append(line)
143
+ return keywords
144
+
145
+ def load_negative_keywords(file_path):
146
+ """Membaca negative keywords dengan model identifier."""
147
+ negative_keywords = {}
148
+ with open(file_path, 'r', encoding='utf-8') as f:
149
+ current_model = None
150
+ for line in f:
151
+ line = line.strip()
152
+ if re.match(r'^\[.*\]$', line): # Mendeteksi model identifier seperti [Model Mundjidah]
153
+ current_model = line.strip('[]')
154
+ negative_keywords[current_model] = []
155
+ elif current_model and line:
156
+ negative_keywords[current_model].append(line)
157
+ return negative_keywords
158
+
159
+ def save_keywords(file_path, keywords):
160
+ """Menyimpan keywords ke file txt."""
161
+ with open(file_path, 'w', encoding='utf-8') as f:
162
+ for category, words in keywords.items():
163
+ f.write(f"[{category}]\n")
164
+ for word in words:
165
+ f.write(f"{word}\n")
166
+ f.write("\n") # Tambahkan baris kosong antar kategori
167
+
168
+ def save_negative_keywords(file_path, negative_keywords):
169
+ """Menyimpan negative keywords ke file txt."""
170
+ with open(file_path, 'w', encoding='utf-8') as f:
171
+ for model, words in negative_keywords.items():
172
+ f.write(f"[{model}]\n")
173
+ for word in words:
174
+ f.write(f"{word}\n")
175
+ f.write("\n")
176
+
177
+ # Fungsi untuk menyimpan data ke file Excel sesuai model
178
+ def save_to_data_train(data, model_name):
179
+ file_paths = {
180
+ "Model Mundjidah": 'data_komen_mundjidah_clean.xlsx',
181
+ "Model Warsubi V1": 'data_komen_warsubi_clean-v1.xlsx'
182
+ }
183
+ file_path = file_paths.get(model_name)
184
+ if not file_path:
185
+ st.error("Model tidak dikenali. Pastikan model sesuai.")
186
+ return
187
+
188
+ # Coba baca file lama atau buat data kosong
189
+ try:
190
+ existing_data = pd.read_excel(file_path)
191
+ except FileNotFoundError:
192
+ existing_data = pd.DataFrame(columns=data.columns)
193
+
194
+ # Gabungkan data baru dan hapus duplikat
195
+ updated_data = pd.concat([existing_data, data], ignore_index=True)
196
+ updated_data = updated_data.drop_duplicates(subset=['Comment', 'Cleaned_Text'])
197
 
198
+ # Simpan data
199
+ updated_data.to_excel(file_path, index=False)
200
+ return file_path
201
 
202
+ # Definisi parameter
203
+ PRE_TRAINED_MODEL = 'indobenchmark/indobert-base-p2'
204
+ EPOCHS = 5
205
+ BATCH_SIZE = 32
206
+ LEARNING_RATE = 1e-5
207
+
208
+ # Fungsi untuk melatih ulang model
209
+ def retrain_model(kamus_data, model_path):
210
+ # Siapkan data
211
+ X = kamus_data['Cleaned_Text']
212
+ y = kamus_data['Brand Attitude']
213
+
214
+ # Konversi label Brand Attitude ke angka
215
+ label_map = {'Co-Likes': 0, 'Co-Support': 1, 'Co-Optimism': 2, 'Co-Negative': 3}
216
+ y = y.map(label_map)
217
+
218
+ # Split data menjadi training dan testing
219
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
220
+
221
+ # Tokenisasi menggunakan BERT tokenizer
222
+ tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL)
223
+ X_train_tokens = tokenizer(list(X_train), padding=True, truncation=True, max_length=128, return_tensors='tf')
224
+ X_test_tokens = tokenizer(list(X_test), padding=True, truncation=True, max_length=128, return_tensors='tf')
225
+
226
+ # Load model BERT
227
+ bert_model = TFBertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL, num_labels=4)
228
+
229
+ # Optimizer dan loss function
230
+ optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
231
+ loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
232
+ metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
233
+
234
+ # Compile model
235
+ bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
236
+
237
+ # Latih model
238
+ bert_model.fit(
239
+ X_train_tokens['input_ids'], y_train,
240
+ epochs=EPOCHS,
241
+ batch_size=BATCH_SIZE,
242
+ validation_data=(X_test_tokens['input_ids'], y_test)
243
+ )
244
+
245
+ # Simpan model
246
+ bert_model.save_pretrained(model_path)
247
+
248
+
249
+ tf.config.set_visible_devices([], 'GPU')
250
  # Tambahkan opsi di sidebar
251
+ menu = st.sidebar.selectbox("Pilih Menu", ["Upload Data", "Hasil Prediksi", "Perlu Validasi","Keyword BA","Normalisasi Kamus", "Overview Data","Retrain Model"])
252
 
253
+ if menu == "Upload Data":
254
  # Streamlit app
255
  st.title("Aplikasi Klasifikasi Sentimen dan Brand Attitude")
256
 
 
267
  data = pd.read_excel(uploaded_file)
268
  elif uploaded_file.name.endswith('.csv'):
269
  data = pd.read_csv(uploaded_file)
270
+
271
+ st.session_state.data = data
272
 
273
  # Bersihkan data
274
  data.dropna(how='all', inplace=True)
 
279
  known_usernames = get_known_usernames(data)
280
  data["Cleaned_Text"] = data["Comment"].apply(lambda x: remove_usernames(x, known_usernames))
281
  data["Cleaned_Text"] = data["Cleaned_Text"].apply(lambda x: normalize_text(clean_text(x), normalization_dict))
282
+
283
+ keywords = load_keywords("keywords.txt")
284
+ negative_keywords = load_negative_keywords("negative_keywords.txt")
285
+ st.session_state.keywords = keywords
286
+ st.session_state.negative_keywords = negative_keywords
287
+
288
  # Konfigurasi model berdasarkan pilihan
289
  if model_choice == "Model Mundjidah":
290
  sentiment_model_path = "mundjidah-model.h5"
291
  ba_model_path = "ba-mundjidah-model.h5"
292
+ selected_df = df_kamus_komen1
293
+ selected_negative_keywords = negative_keywords.get("Model Mundjidah", [])
294
+ positive_keywords = ["semoga menang", "semoga", "baik", "bagus", "terbaik", "semangat", "mundjidah", "amin", "gas", "lanjutkan"]
295
+
296
  elif model_choice == "Model Warsubi V1":
297
  sentiment_model_path = "warsa-model.h5"
298
  ba_model_path = "ba-warsa-model.h5"
299
+ selected_df = df_kamus_komen2
300
+ selected_negative_keywords = negative_keywords.get("Model Warsubi V1", [])
301
+ positive_keywords = ["hebat", "luar biasa", "bagus", "terbaik", "memilih dengan tepat", "all in abah subi", "pilih warsubi", "dua", "✌️", "abah", "sae","sehat","semangat"]
302
+
303
  else: # Tambahan untuk model lain
304
  sentiment_model_path = "warsubi-v2-model.h5"
305
  ba_model_path = "ba-warsubi-v2-model.h5"
306
+ positive_keywords = ["hebat"]
307
+ negative_keywords = ["golput ae"]
308
 
309
  PRE_TRAINED_MODEL = 'indobenchmark/indobert-base-p2'
310
+ st.session_state['model_choice'] = model_choice
311
+
312
  # Load model sentimen
313
  try:
314
  sentiment_model = TFBertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL, num_labels=3)
 
320
 
321
  # Fungsi prediksi sentimen dengan tambahan pencocokan keyword
322
  def predict_with_sentiment_model(text):
 
323
  if any(keyword.lower() in text.lower() for keyword in positive_keywords):
324
  return 'positive'
325
+ elif any(keyword.lower() in text.lower() for keyword in selected_negative_keywords):
326
  return 'negative'
327
 
328
  # Prediksi menggunakan model jika tidak ada keyword yang cocok
 
341
  except Exception as e:
342
  st.error(f"Gagal memuat model Brand Attitude: {e}")
343
  st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
345
+ def predict_ba_with_model(text, ba_model, tokenizer, threshold=0.7):
 
 
346
  for label, keywords_list in keywords.items():
347
  if any(keyword.lower() in text.lower() for keyword in keywords_list):
348
+ return label, 1.0 # Jika cocok keyword, prob = 1.0
349
 
350
  # Jika tidak ada keyword yang cocok, gunakan model untuk prediksi
351
  inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
352
  outputs = ba_model(inputs)
353
  logits = outputs.logits
354
+
355
+ # Hitung probabilitas menggunakan softmax
356
+ probabilities = tf.nn.softmax(logits, axis=-1).numpy()[0]
357
+ max_prob = np.max(probabilities) # Probabilitas tertinggi
358
+ predicted_label_index = np.argmax(probabilities) # Indeks dari label dengan probabilitas tertinggi
359
+ predicted_label = ['Co-Likes', 'Co-Support', 'Co-Optimism', 'Co-Negative'][predicted_label_index]
360
+
361
+ # Jika probabilitas tertinggi kurang dari threshold, set label sebagai 'Co-Likes' untuk review
362
+ if max_prob < threshold:
363
+ predicted_label = 'Co-Likes'
364
 
365
+ return predicted_label, max_prob
366
 
367
+ # Menggunakan fungsi untuk menambahkan prediksi Brand Attitude ke data
368
+ # data['Brand_Attitude'] = data['Cleaned_Text'].apply(lambda x: predict_ba_with_model(x, ba_model, tokenizer, threshold=0.7))
369
+
370
+ # Menambahkan hasil klasifikasi ke DataFrame
371
+ data[['Brand_Attitude', 'Probabilitas']] = data['Cleaned_Text'].apply(
372
+ lambda x: pd.Series(predict_ba_with_model(x, ba_model, tokenizer, threshold=0.7))
373
+ )
374
+
375
  # Tambahkan "Co-Negative" jika Sentimen_Prediksi adalah "negative"
376
  data['Brand_Attitude'] = data.apply(
377
  lambda row: "Co-Negative" if row['Sentimen_Prediksi'] == 'negative' else row['Brand_Attitude'], axis=1
 
381
  data['Brand_Attitude'] = data.apply(
382
  lambda row: "Co-Likes" if row['Sentimen_Prediksi'] != 'negative' and row['Brand_Attitude'] == 'Co-Negative' else row['Brand_Attitude'], axis=1
383
  )
384
+
385
+ st.session_state.classified_data = data
386
+
387
+ # Button to navigate to "Hasil Prediksi"
388
+ st.success("Data berhasil diprediksi! Lihat di menu Hasil Prediksi.")
389
+
390
+ except Exception as e:
391
+ st.error(f"Terjadi kesalahan: {e}")
392
+
393
+ elif menu == "Hasil Prediksi":
394
+ # Streamlit app
395
+ if "classified_data" in st.session_state:
396
+ data = st.session_state.classified_data
397
+ st.title("Aplikasi Klasifikasi Sentimen dan Brand Attitude")
398
+
399
+ # Tampilkan hasil
400
+ st.write("Hasil Klasifikasi Sentimen dan Brand Attitude:")
401
+ st.dataframe(data[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi', 'Brand_Attitude']])
402
+
403
+ # Distribusi level komentar
404
+ st.write("Distribusi Level Komentar:")
405
+ level_counts = data['Brand_Attitude'].value_counts()
406
+ total_co_likes = level_counts.get('Co-Likes', 0)
407
+ total_co_support = level_counts.get('Co-Support', 0)
408
+ total_co_optimism = level_counts.get('Co-Optimism', 0)
409
+ total_co_negative = level_counts.get('Co-Negative', 0)
410
+
411
+ # Tampilkan total jumlah sentimen
412
+ st.write(f"**Total BA Co-Likes:** {total_co_likes}")
413
+ st.write(f"**Total BA Co-Support:** {total_co_support}")
414
+ st.write(f"**Total BA Co-Optimism:** {total_co_optimism}")
415
+ st.write(f"**Total BA Co-Negative:** {total_co_negative}")
416
+
417
+ # Tampilkan jumlah setiap kategori
418
+ st.bar_chart(level_counts)
419
+
420
+ def generate_wordcloud(text):
421
+ wordcloud = WordCloud(
422
+ width=800,
423
+ height=400,
424
+ background_color='white',
425
+ max_words=200,
426
+ colormap='viridis'
427
+ ).generate(text)
428
+ fig, ax = plt.subplots(figsize=(10, 5))
429
+ ax.imshow(wordcloud, interpolation='bilinear')
430
+ ax.axis('off')
431
+ return fig
432
+
433
+ st.write("WordCloud Berdasarkan Brand Attitude:")
434
+ for ba in ['Co-Likes', 'Co-Support', 'Co-Optimism','Co-Negative']:
435
+ text = " ".join(data[data['Brand_Attitude'] == ba]['Cleaned_Text'].tolist())
436
+ if text:
437
+ st.write(f"WordCloud untuk Brand Attitude {ba.capitalize()}:")
438
+ st.pyplot(generate_wordcloud(text))
439
+
440
+ # Fungsi untuk tokenisasi teks
441
+ def tokenize_text(text):
442
+ """Membersihkan dan memisahkan teks menjadi kata-kata."""
443
+ # Hilangkan tanda baca, konversi ke huruf kecil, dan split
444
+ words = text.lower().replace('.', '').replace(',', '').split()
445
+ return words
446
+
447
+ # Fungsi untuk menghitung frekuensi kata
448
+ def get_word_frequencies(data, column):
449
+ """Menghitung frekuensi kata berdasarkan kolom teks tertentu."""
450
+ all_words = []
451
+ for text in data[column]:
452
+ all_words.extend(tokenize_text(text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
+ if len(all_words) == 0:
455
+ return None # Jika tidak ada kata yang ditemukan, kembalikan None
456
+ return Counter(all_words)
457
+
458
+ co_likes_data = data[data['Brand_Attitude'] == 'Co-Likes']
459
+ co_support_data = data[data['Brand_Attitude'] == 'Co-Support']
460
+ co_optimism_data = data[data['Brand_Attitude'] == 'Co-Optimism']
461
+ co_negative_data = data[data['Brand_Attitude'] == 'Co-Negative']
462
+
463
+ # Visualisasi chart untuk kata-kata di BA Co-Likes
464
+ st.write("### Top Kata di BA Co-Likes")
465
+ co_likes_word_counts = get_word_frequencies(co_likes_data, 'Cleaned_Text')
466
+ if co_likes_word_counts is None:
467
+ st.write("Tidak ada kata yang ditemukan di kategori Co-Likes.")
468
+ else:
469
+ co_likes_most_common = co_likes_word_counts.most_common(10)
470
+ co_likes_words, co_likes_counts = zip(*co_likes_most_common)
471
  plt.figure(figsize=(10, 6))
472
+ plt.barh(co_likes_words, co_likes_counts, color='green')
473
  plt.xlabel('Frequency')
474
  plt.ylabel('Words')
475
+ plt.title('Top Words in Co-Likes Category')
476
  plt.gca().invert_yaxis()
477
  st.pyplot(plt)
478
+
479
+ # Visualisasi chart untuk kata-kata di BA Co-Support
480
+ st.write("### Top Kata di BA Co-Support")
481
+ co_support_word_counts = get_word_frequencies(co_support_data, 'Cleaned_Text')
482
+ if co_support_word_counts is None:
483
+ st.write("Tidak ada kata yang ditemukan di kategori Co-Support.")
484
+ else:
485
+ co_support_most_common = co_support_word_counts.most_common(10)
486
+ co_support_words, co_support_counts = zip(*co_support_most_common)
487
  plt.figure(figsize=(10, 6))
488
+ plt.barh(co_support_words, co_support_counts, color='orange')
489
  plt.xlabel('Frequency')
490
  plt.ylabel('Words')
491
+ plt.title('Top Words in Co-Support Category')
492
  plt.gca().invert_yaxis()
493
  st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
+ # Visualisasi chart untuk kata-kata di BA Co-Optimism
496
+ st.write("### Top Kata di BA Co-Optimism")
497
+ co_optimism_word_counts = get_word_frequencies(co_optimism_data, 'Cleaned_Text')
498
+ if co_optimism_word_counts is None:
499
+ st.write("Tidak ada kata yang ditemukan di kategori Co-Optimism.")
500
+ else:
501
+ co_optimism_most_common = co_optimism_word_counts.most_common(10)
502
+ co_optimism_words, co_optimism_counts = zip(*co_optimism_most_common)
503
+ plt.figure(figsize=(10, 6))
504
+ plt.barh(co_optimism_words, co_optimism_counts, color='blue')
505
+ plt.xlabel('Frequency')
506
+ plt.ylabel('Words')
507
+ plt.title('Top Words in Co-Optimism Category')
508
+ plt.gca().invert_yaxis()
509
+ st.pyplot(plt)
510
 
511
+ # Visualisasi chart untuk kata-kata di BA Co-Negative
512
+ st.write("### Top Kata di BA Co-Negative")
513
+ co_negative_word_counts = get_word_frequencies(co_negative_data, 'Cleaned_Text')
514
+ if co_negative_word_counts is None:
515
+ st.write("Tidak ada kata yang ditemukan di kategori Co-Negative.")
516
+ else:
517
+ co_negative_most_common = co_negative_word_counts.most_common(10)
518
+ co_negative_words, co_negative_counts = zip(*co_negative_most_common)
519
+ plt.figure(figsize=(10, 6))
520
+ plt.barh(co_negative_words, co_negative_counts, color='red')
521
+ plt.xlabel('Frequency')
522
+ plt.ylabel('Words')
523
+ plt.title('Top Words in Co-Negative Category')
524
+ plt.gca().invert_yaxis()
525
+ st.pyplot(plt)
526
+
527
+ # Siapkan data untuk diperbarui
528
+ new_data = data[['Comment', 'Cleaned_Text', 'Sentimen_Prediksi', 'Brand_Attitude']].copy()
529
 
530
+ else:
531
+ st.warning("Tidak ada hasil prediksi. Silakan upload data terlebih dahulu di menu 'Upload Data'.")
 
 
 
532
 
533
+ # Menu Perlu Validasi
534
+ elif menu == "Perlu Validasi":
535
+ st.title("Komentar Perlu Validasi")
536
 
537
+ # Periksa apakah data hasil klasifikasi tersedia
538
+ if 'classified_data' not in st.session_state:
539
+ st.error("Silakan klasifikasikan data terlebih dahulu di menu sebelumnya.")
540
+ else:
541
+ # Ambil data komentar yang probabilitasnya rendah
542
+ data = st.session_state.classified_data
543
+
544
+ if 'Status' not in data.columns:
545
+ data['Status'] = False # Default nilai False
546
+
547
+ review_data = data[(data['Brand_Attitude'] == 'Co-Likes') & (data['Probabilitas'] < 0.7)]
548
+
549
+ if review_data.empty:
550
+ st.write("Tidak ada komentar yang memerlukan validasi saat ini.")
551
+ else:
552
+ # Proses Clustering
553
+ st.write("### Clustering Komentar")
554
+ vectorizer = TfidfVectorizer(max_features=500, stop_words='english')
555
+ X = vectorizer.fit_transform(review_data['Cleaned_Text'])
556
+
557
+ # Slider untuk memilih jumlah cluster
558
+ k = st.slider("Pilih jumlah cluster:", min_value=2, max_value=10, value=3)
559
+ kmeans = KMeans(n_clusters=k, random_state=42)
560
+ review_data['Cluster'] = kmeans.fit_predict(X)
561
+
562
+ # Dropdown untuk memilih cluster
563
+ cluster_ids = sorted(review_data['Cluster'].unique())
564
+ selected_cluster = st.selectbox("Pilih Cluster untuk Ditampilkan:", cluster_ids)
565
+
566
+ # Tampilkan tabel komentar berdasarkan cluster yang dipilih
567
+ st.write(f"### Komentar di Cluster {selected_cluster}")
568
+ cluster_data = review_data[review_data['Cluster'] == selected_cluster]
569
+ st.dataframe(cluster_data[['Cleaned_Text', 'Brand_Attitude', 'Probabilitas']])
570
+
571
+ # Form untuk validasi Brand Attitude
572
+ st.write("### Validasi Brand Attitude")
573
+ with st.form(key=f"form_cluster_{selected_cluster}"):
574
+ update_all = st.checkbox("Ubah seluruh komentar dalam cluster ini")
575
+ if update_all:
576
+ # Ubah semua komentar dalam cluster
577
+ new_brand_attitude = st.selectbox("Pilih Brand Attitude Baru:",
578
+ ["Co-Likes", "Co-Support", "Co-Optimism", "Co-Negative"],
579
+ key=f"all_{selected_cluster}")
580
+ else:
581
+ # Ubah komentar tertentu dalam cluster
582
+ cleaned_text_to_update = st.selectbox("Pilih komentar untuk diubah:", cluster_data['Cleaned_Text'])
583
+ new_brand_attitude = st.selectbox("Pilih Brand Attitude Baru:",
584
+ ["Co-Likes", "Co-Support", "Co-Optimism", "Co-Negative"],
585
+ key=f"one_{selected_cluster}")
586
+
587
+ submit_button = st.form_submit_button("Update Brand Attitude")
588
+
589
+ if submit_button:
590
+ if update_all:
591
+ # Update seluruh komentar dalam cluster
592
+ review_data.loc[review_data['Cluster'] == selected_cluster, 'Brand_Attitude'] = new_brand_attitude
593
+ review_data.loc[review_data['Cluster'] == selected_cluster, 'Status'] = True
594
+ st.success(f"Brand Attitude untuk seluruh komentar di Cluster {selected_cluster} berhasil diperbarui menjadi: {new_brand_attitude}")
595
+ else:
596
+ # Update komentar tertentu
597
+ review_data.loc[review_data['Cleaned_Text'] == cleaned_text_to_update, 'Brand_Attitude'] = new_brand_attitude
598
+ review_data.loc[review_data['Cleaned_Text'] == cleaned_text_to_update, 'Status'] = True
599
+ st.success(f"Brand Attitude berhasil diperbarui untuk komentar: {cleaned_text_to_update}")
600
+
601
+ # Update data hasil prediksi awal di session_state
602
+ st.session_state.classified_data.loc[review_data.index, :] = review_data
603
+
604
+ # Menu Keyword BA
605
+ elif menu == "Keyword BA":
606
+ st.subheader("Keyword BA Menu")
607
 
608
+ # Load keywords dari file
609
+ keywords = load_keywords("keywords.txt")
610
+ negative_keywords = load_negative_keywords("negative_keywords.txt")
 
611
 
612
+ # Ambil model yang digunakan dari session state
613
+ current_model = st.session_state.get("model_choice", "Model Mundjidah")
 
 
 
 
 
 
614
 
615
+ # Update Co-Negative keywords berdasarkan model
616
+ if current_model in negative_keywords:
617
+ keywords['Co-Negative'] = negative_keywords[current_model]
618
+ else:
619
+ keywords['Co-Negative'] = []
620
 
621
+ # Pilih Brand Attitude dan tampilkan komentar
622
+ st.write("### Pilih Brand Attitude untuk melihat komentarnya")
623
+ ba_option = st.selectbox("Pilih Brand Attitude", list(keywords.keys()), index=0)
624
 
625
+ # Tampilkan keyword untuk BA
626
+ st.write(f"### Keyword untuk {ba_option}")
627
+ st.write(", ".join(keywords[ba_option]))
628
+
629
+ # Tampilkan komentar sesuai BA
630
+ data = st.session_state.classified_data
631
+ filtered_data = data[data['Brand_Attitude'] == ba_option]
632
+ filtered_data = filtered_data.sort_values(by='Cleaned_Text', ascending=True) # Sort ascending
633
+ if filtered_data.empty:
634
+ st.write("Tidak ada komentar yang ditemukan untuk Brand Attitude ini.")
635
+ else:
636
+ st.write(filtered_data[['Cleaned_Text', 'Brand_Attitude']])
637
+
638
+ if 'Status' not in data.columns:
639
+ data['Status'] = False # Default nilai False
640
+
641
+ # CRUD Operations
642
+ st.write("### Kelola Keyword")
643
+ with st.form("manage_keywords_form"):
644
+ # Pilih keyword untuk diupdate atau dihapus
645
+ selected_keyword = st.selectbox("Pilih Keyword untuk Diubah atau Dihapus", keywords[ba_option])
646
+ new_keyword_value = st.text_input("Ubah Keyword (Kosongkan jika ingin menghapus)", value=selected_keyword)
647
+ action = st.radio("Pilih Aksi", ["Update", "Delete"], index=0)
648
+ manage_submit_button = st.form_submit_button("Lakukan Perubahan")
649
+
650
+ if manage_submit_button:
651
+ if action == "Update" and new_keyword_value.strip():
652
+ # Update keyword
653
+ index = keywords[ba_option].index(selected_keyword)
654
+ keywords[ba_option][index] = new_keyword_value.strip()
655
+ save_keywords("keywords.txt", keywords) # Simpan perubahan
656
+ st.success(f"Keyword '{selected_keyword}' berhasil diubah menjadi '{new_keyword_value.strip()}'.")
657
+ elif action == "Delete":
658
+ # Delete keyword
659
+ keywords[ba_option].remove(selected_keyword)
660
+ save_keywords("keywords.txt", keywords) # Simpan perubahan
661
+ st.success(f"Keyword '{selected_keyword}' berhasil dihapus.")
662
+ else:
663
+ st.warning("Masukkan keyword baru untuk update atau pilih aksi delete.")
664
+
665
+ # Tampilkan semua Brand Attitude dengan filter dan search
666
+ st.write("### Tabel Semua Data dengan Filter dan Pencarian")
667
+
668
+ # Periksa apakah classified_data tersedia
669
+ if "classified_data" in st.session_state:
670
+ data = st.session_state.classified_data
671
+
672
+ # Input teks untuk filter
673
+ search_text = st.text_input("Cari berdasarkan teks komentar atau Brand Attitude:")
674
+
675
+ # Filter data berdasarkan input teks
676
+ if search_text:
677
+ filtered_data = data[
678
+ data['Cleaned_Text'].str.contains(search_text, case=False, na=False) |
679
+ data['Brand_Attitude'].str.contains(search_text, case=False, na=False)
680
+ ]
681
+ else:
682
+ filtered_data = data
683
+
684
+ edited_data = st.data_editor(
685
+ filtered_data[['Cleaned_Text', 'Brand_Attitude']].copy(),
686
+ use_container_width=True,
687
+ key="ba_editor"
688
+ )
689
+
690
+ # Tombol untuk menyimpan perubahan
691
+ if st.button("Simpan Perubahan"):
692
+ # Update kolom Brand Attitude dan Status di data asli berdasarkan perubahan di tabel
693
+ for index, row in edited_data.iterrows():
694
+ original_row = filtered_data.loc[index]
695
+ if row['Brand_Attitude'] != original_row['Brand_Attitude']:
696
+ data.loc[index, 'Brand_Attitude'] = row['Brand_Attitude']
697
+ data.loc[index, 'Status'] = True # Tandai sebagai diupdate
698
+
699
+ # Simpan kembali ke session_state
700
+ st.session_state.classified_data = data
701
+ st.success("Perubahan berhasil disimpan!")
702
+ else:
703
+ st.warning("Tidak ada data yang tersedia. Silakan upload data terlebih dahulu.")
704
+
705
+ # Tambahkan keyword baru
706
+ st.write("### Tambahkan Keyword Baru")
707
+ with st.form("add_keyword_form"):
708
+ new_ba = st.selectbox("Pilih Brand Attitude untuk Keyword Baru", list(keywords.keys()))
709
+ new_keyword = st.text_input("Masukkan Keyword Baru")
710
+ add_submit_button = st.form_submit_button("Tambah Keyword")
711
+
712
+ if add_submit_button and new_keyword.strip():
713
+ if new_ba == "Co-Negative":
714
+ # Tambahkan keyword ke negative_keywords.txt
715
+ negative_keywords[current_model].append(new_keyword.strip())
716
+ save_negative_keywords("negative_keywords.txt", negative_keywords)
717
+ st.success(f"Keyword Co-Negative '{new_keyword.strip()}' berhasil ditambahkan untuk model '{current_model}'!")
718
+ else:
719
+ # Tambahkan keyword ke keywords.txt
720
+ keywords[new_ba].append(new_keyword.strip())
721
+ save_keywords("keywords.txt", keywords)
722
+ st.success(f"Keyword '{new_keyword.strip()}' berhasil ditambahkan ke {new_ba}!")
723
+
724
+ # Simpan ke session_state
725
+ st.session_state.classified_data = data
726
+ st.session_state.keywords = keywords
727
+ st.session_state.negative_keywords = negative_keywords
728
 
729
+
730
+ elif menu == "Normalisasi Kamus":
731
+ st.subheader("Normalisasi Kamus")
732
 
733
+ # Mengambil data dari session_state jika tersedia
734
+ if 'classified_data' not in st.session_state:
735
+ st.error("Silakan unggah file dan lakukan klasifikasi di menu 'Klasifikasi Sentimen' terlebih dahulu.")
736
+ else:
737
+ # Mengambil data yang telah diproses dan diklasifikasikan
738
+ data = st.session_state.classified_data
739
+
740
+ # Pastikan kolom 'Status' ada di DataFrame
741
+ if 'Status' not in data.columns:
742
+ data['Status'] = False # Tambahkan kolom 'Status' jika belum ada
743
+
744
+ # Tokenisasi dan hitung frekuensi kata
745
+ def tokenize(text):
746
+ return re.findall(r'\b\w+\b', text.lower()) # Tokenisasi kata-kata, huruf kecil semua
747
+
748
+ # Fungsi untuk menormalkan kata-kata di dalam data
749
+ def normalize_data(data, slang_dict):
750
+ # Proses normalisasi kata
751
+ def normalize_text(text):
752
+ words = text.split()
753
+ normalized_words = []
754
+ updated = False
755
+ for word in words:
756
+ if word in slang_dict:
757
+ normalized_words.append(slang_dict[word])
758
+ updated = True
759
+ else:
760
+ normalized_words.append(word)
761
+ # Tandai status sebagai TRUE jika terjadi perubahan
762
+ if updated:
763
+ data.loc[data['Cleaned_Text'] == text, 'Status'] = True
764
+ return ' '.join(normalized_words)
765
+
766
+ data['Cleaned_Text'] = data['Cleaned_Text'].apply(normalize_text)
767
+ return data
768
+
769
+ # Gabungkan semua komentar untuk tokenisasi
770
+ all_comments = ' '.join(data['Cleaned_Text'])
771
+ words = tokenize(all_comments)
772
+
773
+ # Hitung frekuensi kata
774
+ word_counts = Counter(words)
775
+
776
+ # Filter kata yang frekuensinya lebih dari 10
777
+ filtered_word_counts = {word: count for word, count in word_counts.items()}
778
+
779
+ # Urutkan berdasarkan frekuensi
780
+ sorted_words = sorted(filtered_word_counts.items(), key=lambda x: x[1], reverse=True)
781
+
782
+ # Tampilkan tabel kata dan frekuensinya
783
+ st.write("Berikut adalah daftar kata-kata hasil tokenisasi:")
784
+ word_df = pd.DataFrame(sorted_words, columns=["Kata", "Frekuensi"])
785
+ st.dataframe(word_df)
786
+
787
+ # Membaca kamus normalisasi dari file
788
+ slang_dict = load_slang_dict('slang.txt')
789
+
790
+ if not slang_dict:
791
+ st.write("Belum ada kamus normalisasi yang ditemukan.")
792
+ else:
793
+ # Menampilkan kamus normalisasi yang sudah ada
794
+ st.write("### Kamus Normalisasi yang Sudah Ada")
795
+ norm_dict_df = pd.DataFrame(list(slang_dict.items()), columns=["Kata Asli", "Kata Normalisasi"])
796
+ st.dataframe(norm_dict_df)
797
+
798
+ # Tambahkan fitur untuk meng-update kata normalisasi
799
+ st.write("### Tambahkan Normalisasi Kata")
800
+ with st.form("add_normalization_form"):
801
+ new_word = st.text_input("Masukkan kata yang belum normal", "")
802
+ normalized_word = st.text_input("Masukkan kata normalisasi", "")
803
+ submit_button = st.form_submit_button("Tambah Normalisasi")
804
+
805
+ if submit_button:
806
+ if new_word and normalized_word:
807
+ # Menambahkan normalisasi kata baru ke kamus
808
+ slang_dict[new_word] = normalized_word
809
+ save_slang_dict(slang_dict, 'slang.txt') # Simpan pembaruan ke file
810
+ st.success(f"Normalisasi kata '{new_word}' -> '{normalized_word}' berhasil ditambahkan!")
811
+ else:
812
+ st.warning("Harap masukkan kata yang belum normal dan kata normalisasi!")
813
+
814
+ # Setelah menambahkan normalisasi, kita akan menormalkan data
815
+ if slang_dict:
816
+ data = normalize_data(data, slang_dict)
817
+
818
+ # Menampilkan hasil normalisasi
819
+ st.write("Hasil Normalisasi pada Data:")
820
+ st.dataframe(data[['Comment', 'Cleaned_Text', 'Status']])
821
+
822
+ # Menyimpan data yang telah dinormalisasi ke session state
823
+ st.session_state.classified_data = data
824
+
825
+
826
+ # Menu Overview Data
827
+ elif menu == "Overview Data":
828
+ st.title("Overview Data")
829
+
830
+ # Periksa apakah data sudah tersedia
831
+ if 'classified_data' not in st.session_state:
832
+ st.error("Silakan unggah dan klasifikasikan data di menu sebelumnya.")
833
+ else:
834
+ data = st.session_state.classified_data
835
+
836
+ # Pastikan kolom 'Status' ada
837
+ if 'Status' not in data.columns:
838
+ data['Status'] = False # Tambahkan kolom 'Status' jika belum ada
839
+
840
+ # Tampilkan data akhir
841
+ st.write("### Data Akhir:")
842
+ final_data = data[['Cleaned_Text', 'Brand_Attitude', 'Status']].copy()
843
+ st.dataframe(final_data)
844
+
845
+ # Summary Perolehan Brand Attitude
846
+ st.write("### Summary Perolehan Brand Attitude:")
847
+ ba_summary = data['Brand_Attitude'].value_counts().reset_index()
848
+ ba_summary.columns = ['Brand_Attitude', 'Jumlah']
849
+ st.table(ba_summary)
850
+
851
+ # Hitung jumlah data yang tervalidasi ulang (status == True)
852
+ total_validated = data[data['Status'] == True].shape[0]
853
+ st.write(f"### Total Data yang Tervalidasi Ulang: {total_validated}")
854
+
855
+ # Tambahkan kolom hitungan Brand Attitude
856
+ data['Co-Likes'] = data['Brand_Attitude'].apply(lambda x: 1 if x == 'Co-Likes' else 0)
857
+ data['Co-Support'] = data['Brand_Attitude'].apply(lambda x: 1 if x == 'Co-Support' else 0)
858
+ data['Co-Optimism'] = data['Brand_Attitude'].apply(lambda x: 1 if x == 'Co-Optimism' else 0)
859
+ data['Co-Negative'] = data['Brand_Attitude'].apply(lambda x: 1 if x == 'Co-Negative' else 0)
860
+
861
+ # Hitung sebaran Brand Attitude per Parent Link
862
+ ba_per_parent_link_updated = data.groupby('Parent Link').agg({
863
+ 'Nama Akun': 'first', # Ambil hanya 1 Nama Akun pertama
864
+ 'Co-Likes': 'sum',
865
+ 'Co-Support': 'sum',
866
+ 'Co-Optimism': 'sum',
867
+ 'Co-Negative': 'sum'
868
+ }).reset_index()
869
+
870
+ # Reorganisasi kolom
871
+ ba_per_parent_link_updated = ba_per_parent_link_updated[['Nama Akun', 'Parent Link', 'Co-Likes', 'Co-Support', 'Co-Optimism', 'Co-Negative']]
872
+ st.write("### Hasil Perolehan Brand Attitude per Postingan:")
873
+ st.dataframe(ba_per_parent_link_updated)
874
+
875
+ # Tombol untuk update ke database postingan
876
+ st.write("### Update Perolehan ke Database Postingan")
877
+ if st.button("Update ke 'Data Jombang.xlsx'"):
878
+ try:
879
+ # Cek apakah file "Data Jombang.xlsx" sudah ada
880
+ try:
881
+ existing_data = pd.read_excel('Data Jombang.xlsx')
882
+ except FileNotFoundError:
883
+ existing_data = pd.DataFrame(columns=ba_per_parent_link_updated.columns)
884
+
885
+ # Gabungkan data baru ke existing_data berdasarkan 'Parent Link'
886
+ updated_data = pd.concat([existing_data, ba_per_parent_link_updated]).drop_duplicates(subset='Parent Link', keep='last')
887
+
888
+ # Simpan hasil pembaruan ke file Excel
889
+ updated_data.to_excel('Data Jombang.xlsx', index=False)
890
+ st.success("Data berhasil diperbarui ke 'Data Jombang.xlsx'!")
891
+ except Exception as e:
892
+ st.error(f"Terjadi kesalahan saat memperbarui data: {e}")
893
 
894
+ # Tombol Kirim Data ke Database
895
+ st.write("### Kirim Data ke Database")
896
+ if st.button("Kirim Data ke Database"):
897
+ try:
898
+ # Tambahkan kolom Created At
899
+ data['Created At'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
900
+
901
+ # Gabungkan dengan data lama jika ada
902
+ try:
903
+ db_data = pd.read_excel('database_komen.xlsx')
904
+ db_data = pd.concat([db_data, data], ignore_index=True)
905
+ db_data = db_data.drop_duplicates() # Hapus duplikat
906
+ except FileNotFoundError:
907
+ db_data = data
908
+
909
+ # Simpan hasil ke file Excel
910
+ db_data.to_excel('database_komen.xlsx', index=False)
911
+ st.success("Data berhasil dikirim ke database!")
912
+ except Exception as e:
913
+ st.error(f"Terjadi kesalahan saat menyimpan ke database: {e}")
914
+
915
+ # Tombol Kirim Data ke Retraining
916
+ st.write("### Kirim Data ke Retraining")
917
+ if 'model_choice' in st.session_state:
918
+ model_name = st.session_state['model_choice']
919
+ st.write(f"Model yang digunakan: **{model_name}**")
920
+
921
+ if st.button("Kirim Data ke Data Train"):
922
+ try:
923
+ # Siapkan data yang akan dikirim ke data train
924
+ data_to_train = data.copy()
925
+ data_to_train['Sentimen_Aktual'] = data_to_train['Sentimen_Prediksi']
926
+ data_to_train['Brand Attitude'] = data_to_train['Brand_Attitude']
927
+ data_to_train['Date'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
928
+
929
+ # Reorganisasi kolom
930
+ data_to_train = data_to_train[['Comment', 'Sentimen_Aktual', 'Cleaned_Text',
931
+ 'Kandidat', 'Parent Link', 'Date', 'Brand Attitude']]
932
+
933
+ # Simpan data ke file train sesuai model
934
+ file_path = save_to_data_train(data_to_train, model_name)
935
+ st.success(f"Data berhasil dikirim ke retraining: **{file_path}**")
936
+ except Exception as e:
937
+ st.error(f"Terjadi kesalahan: {e}")
938
+ else:
939
+ st.error("Model belum dipilih. Silakan klasifikasikan data terlebih dahulu.")
940
+
941
+ # Menu Retrain Model
942
+ elif menu == "Retrain Model":
943
+ st.title("Retrain Model")
944
  kamus_option = st.selectbox(
945
  "Pilih Kamus yang Ingin Diedit:",
946
  ["data_komen_mundjidah_clean.xlsx", "data_komen_warsubi_clean-v1.xlsx"]
947
  )
948
 
949
+ # Tentukan path model sesuai kamus
950
+ model_paths = {
951
+ "data_komen_mundjidah_clean.xlsx": "update_mundjidah-model",
952
+ "data_komen_warsubi_clean-v1.xlsx": "update_warsubi-model"
953
+ }
954
+ model_path = model_paths[kamus_option]
 
 
 
 
 
 
 
 
 
 
 
 
955
 
956
+ # Muat data kamus dari Excel
957
+ try:
958
+ kamus_data = pd.read_excel(kamus_option)
959
+
960
+ st.write("### Tabel Kamus Saat Ini")
961
+ edited_data = st.data_editor(
962
+ kamus_data,
963
+ use_container_width=True,
964
+ height=500
965
+ )
966
+
967
+ # Simpan perubahan ke Excel
968
+ if st.button("Simpan Perubahan"):
969
+ edited_data.to_excel(kamus_option, index=False)
970
+ st.success(f"Perubahan berhasil disimpan ke {kamus_option}!")
971
+
972
+ # Tombol untuk retrain model
973
+ if st.button("Retrain Model"):
974
+ with st.spinner("Melatih ulang model..."):
975
+ retrain_model(edited_data, model_path)
976
+ st.success(f"Model berhasil dilatih ulang dan disimpan di path: {model_path}!")
977
 
978
+ except Exception as e:
979
+ st.error(f"Terjadi kesalahan saat memuat atau menyimpan kamus: {e}")