naufalnashif
commited on
Commit
β’
6f065f8
1
Parent(s):
8545923
Update app.py
Browse files
app.py
CHANGED
@@ -58,35 +58,34 @@ def clean_text(text):
|
|
58 |
|
59 |
return text
|
60 |
|
61 |
-
# Membaca kamus kata gaul Salsabila
|
62 |
-
kamus_path = '_json_colloquial-indonesian-lexicon (1).txt' # Ganti dengan path yang benar
|
63 |
-
with open(kamus_path) as f:
|
64 |
-
data = f.read()
|
65 |
-
lookp_dict = json.loads(data)
|
66 |
-
|
67 |
-
# Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
|
68 |
-
kamus_sendiri_path = 'kamus_gaul_custom.txt'
|
69 |
-
with open(kamus_sendiri_path) as f:
|
70 |
-
kamus_sendiri = f.read()
|
71 |
-
kamus_gaul_baru = json.loads(kamus_sendiri)
|
72 |
-
|
73 |
-
# Menambahkan dict kata gaul baru ke kamus yang sudah ada
|
74 |
-
lookp_dict.update(kamus_gaul_baru)
|
75 |
-
|
76 |
# Fungsi untuk normalisasi kata gaul
|
77 |
@st.cache_data
|
78 |
def normalize_slang(text, slang_dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
words = text.split()
|
80 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
81 |
return ' '.join(normalized_words)
|
82 |
|
83 |
#---------------------------------------------------NLTK Remove Stopwords----------------------------------------------------------------------
|
84 |
|
85 |
-
|
86 |
-
nltk.download("stopwords")
|
87 |
-
stop_words = set(stopwords.words("indonesian"))
|
88 |
-
|
89 |
def remove_stopwords(text, stop_words):
|
|
|
|
|
90 |
# Pecah teks menjadi kata-kata
|
91 |
words = text.split()
|
92 |
|
@@ -95,15 +94,14 @@ def remove_stopwords(text, stop_words):
|
|
95 |
|
96 |
return " ".join(words)
|
97 |
#---------------------------------------------------TFIDF----------------------------------------------------------------------
|
98 |
-
# Fungsi untuk ekstraksi fitur TF-IDF
|
99 |
-
def extract_tfidf_features(texts, tfidf_vectorizer):
|
100 |
-
tfidf_matrix = tfidf_vectorizer.transform(texts)
|
101 |
-
return tfidf_matrix
|
102 |
-
|
103 |
# Memuat model TF-IDF dengan joblib (pastikan path-nya benar)
|
104 |
tfidf_model_path = 'X_tfidf_model.joblib'
|
105 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
106 |
-
|
|
|
|
|
|
|
|
|
107 |
#---------------------------------------------------Milih Model----------------------------------------------------------------------
|
108 |
|
109 |
# Fungsi untuk memilih model berdasarkan pilihan pengguna
|
@@ -125,8 +123,8 @@ def select_sentiment_model(selected_model):
|
|
125 |
|
126 |
|
127 |
# Fungsi untuk prediksi sentimen
|
128 |
-
|
129 |
-
def predict_sentiment(text,
|
130 |
# Tahap-1: Membersihkan dan normalisasi teks
|
131 |
cleaned_text = clean_text(text)
|
132 |
norm_slang_text = normalize_slang(cleaned_text, slang_dict)
|
@@ -142,7 +140,7 @@ def predict_sentiment(text, model, tfidf_vectorizer, slang_dict):
|
|
142 |
sentiment_label = labels[int(sentiment)]
|
143 |
|
144 |
return sentiment_label
|
145 |
-
|
146 |
def get_emoticon(sentiment):
|
147 |
if sentiment == "Positif":
|
148 |
emoticon = "π" # Emotikon untuk sentimen positif
|
@@ -153,6 +151,7 @@ def get_emoticon(sentiment):
|
|
153 |
|
154 |
return emoticon
|
155 |
|
|
|
156 |
def buat_chart(df, target_year):
|
157 |
target_year = int(target_year)
|
158 |
st.write(f"Bar Chart Tahun {target_year}:")
|
@@ -216,6 +215,7 @@ def buat_chart(df, target_year):
|
|
216 |
)
|
217 |
|
218 |
# Fungsi untuk membuat tautan unduhan
|
|
|
219 |
def get_table_download_link(df, download_format):
|
220 |
if download_format == "XLSX":
|
221 |
df.to_excel("hasil_sentimen.xlsx", index=False)
|
|
|
58 |
|
59 |
return text
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
# Fungsi untuk normalisasi kata gaul
|
62 |
@st.cache_data
|
63 |
def normalize_slang(text, slang_dict):
|
64 |
+
# Membaca kamus kata gaul Salsabila
|
65 |
+
kamus_path = '_json_colloquial-indonesian-lexicon (1).txt' # Ganti dengan path yang benar
|
66 |
+
with open(kamus_path) as f:
|
67 |
+
data = f.read()
|
68 |
+
lookp_dict = json.loads(data)
|
69 |
+
|
70 |
+
# Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
|
71 |
+
kamus_sendiri_path = 'kamus_gaul_custom.txt'
|
72 |
+
with open(kamus_sendiri_path) as f:
|
73 |
+
kamus_sendiri = f.read()
|
74 |
+
kamus_gaul_baru = json.loads(kamus_sendiri)
|
75 |
+
|
76 |
+
# Menambahkan dict kata gaul baru ke kamus yang sudah ada
|
77 |
+
lookp_dict.update(kamus_gaul_baru)
|
78 |
+
|
79 |
words = text.split()
|
80 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
81 |
return ' '.join(normalized_words)
|
82 |
|
83 |
#---------------------------------------------------NLTK Remove Stopwords----------------------------------------------------------------------
|
84 |
|
85 |
+
@st.cache_data
|
|
|
|
|
|
|
86 |
def remove_stopwords(text, stop_words):
|
87 |
+
nltk.download("stopwords")
|
88 |
+
stop_words = set(stopwords.words("indonesian"))
|
89 |
# Pecah teks menjadi kata-kata
|
90 |
words = text.split()
|
91 |
|
|
|
94 |
|
95 |
return " ".join(words)
|
96 |
#---------------------------------------------------TFIDF----------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
97 |
# Memuat model TF-IDF dengan joblib (pastikan path-nya benar)
|
98 |
tfidf_model_path = 'X_tfidf_model.joblib'
|
99 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
100 |
+
# Fungsi untuk ekstraksi fitur TF-IDF
|
101 |
+
#@st.cache_data
|
102 |
+
#def extract_tfidf_features(texts, _tfidf_vectorizer):
|
103 |
+
# tfidf_matrix = tfidf_vectorizer.transform(texts)
|
104 |
+
# return tfidf_matrix
|
105 |
#---------------------------------------------------Milih Model----------------------------------------------------------------------
|
106 |
|
107 |
# Fungsi untuk memilih model berdasarkan pilihan pengguna
|
|
|
123 |
|
124 |
|
125 |
# Fungsi untuk prediksi sentimen
|
126 |
+
@st.cache_data
|
127 |
+
def predict_sentiment(text, _model, _tfidf_vectorizer, slang_dict):
|
128 |
# Tahap-1: Membersihkan dan normalisasi teks
|
129 |
cleaned_text = clean_text(text)
|
130 |
norm_slang_text = normalize_slang(cleaned_text, slang_dict)
|
|
|
140 |
sentiment_label = labels[int(sentiment)]
|
141 |
|
142 |
return sentiment_label
|
143 |
+
@st.cache_data
|
144 |
def get_emoticon(sentiment):
|
145 |
if sentiment == "Positif":
|
146 |
emoticon = "π" # Emotikon untuk sentimen positif
|
|
|
151 |
|
152 |
return emoticon
|
153 |
|
154 |
+
@st.cache_data
|
155 |
def buat_chart(df, target_year):
|
156 |
target_year = int(target_year)
|
157 |
st.write(f"Bar Chart Tahun {target_year}:")
|
|
|
215 |
)
|
216 |
|
217 |
# Fungsi untuk membuat tautan unduhan
|
218 |
+
@st.cache_data
|
219 |
def get_table_download_link(df, download_format):
|
220 |
if download_format == "XLSX":
|
221 |
df.to_excel("hasil_sentimen.xlsx", index=False)
|