Spaces:
Running
Running
Upload 10 files
Browse files- .gitattributes +1 -0
- app.py +21 -0
- deteksi_content.py +149 -0
- deteksi_upload.py +170 -0
- home.py +285 -0
- inbound-source-431806-g7-e49e388ce0be.json +13 -0
- load_model.py +19 -0
- mafindo_mix_llm.csv +3 -0
- requirements.txt +11 -0
- styles.py +43 -0
- test.py +63 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
mafindo_mix_llm.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
# Set page configuration
|
4 |
+
st.set_page_config(page_title="Hoax Detection Dashboard", layout="wide")
|
5 |
+
st.title("Dashboard Deteksi Berita Hoax")
|
6 |
+
|
7 |
+
from home import show_home
|
8 |
+
from deteksi_content import show_deteksi_konten
|
9 |
+
from deteksi_upload import show_deteksi_upload
|
10 |
+
|
11 |
+
# Create tabs
|
12 |
+
tab1, tab2, tab3 = st.tabs(["Home", "Deteksi Konten", "Deteksi File"])
|
13 |
+
|
14 |
+
with tab1:
|
15 |
+
show_home()
|
16 |
+
|
17 |
+
with tab2:
|
18 |
+
show_deteksi_konten()
|
19 |
+
|
20 |
+
with tab3:
|
21 |
+
show_deteksi_upload()
|
deteksi_content.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from datetime import datetime
|
3 |
+
import pandas as pd
|
4 |
+
from lime.lime_text import LimeTextExplainer
|
5 |
+
from test import predict_hoax, predict_proba_for_lime
|
6 |
+
import streamlit.components.v1 as components
|
7 |
+
from load_model import load_model
|
8 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
9 |
+
from styles import COMMON_CSS
|
10 |
+
from google.cloud import storage
|
11 |
+
import os
|
12 |
+
from io import StringIO
|
13 |
+
|
14 |
+
# Set environment variable for Google Cloud credentials
|
15 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\DashboardHoax\inbound-source-431806-g7-e49e388ce0be.json"
|
16 |
+
|
17 |
+
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
18 |
+
client = storage.Client() # Uses the credentials set by the environment variable
|
19 |
+
bucket = client.bucket("dashboardhoax-bucket")
|
20 |
+
blob = bucket.blob("koreksi_pengguna_content.csv")
|
21 |
+
|
22 |
+
# Check if the blob (file) exists
|
23 |
+
if blob.exists():
|
24 |
+
# Download existing CSV from GCS
|
25 |
+
existing_data = blob.download_as_string().decode('utf-8')
|
26 |
+
existing_df = pd.read_csv(StringIO(existing_data))
|
27 |
+
else:
|
28 |
+
# Create a new DataFrame if the file does not exist
|
29 |
+
existing_df = pd.DataFrame(columns=['Timestamp', 'Title', 'Content', 'Prediction', 'Correction'])
|
30 |
+
|
31 |
+
# Append the new data to the existing data
|
32 |
+
new_data_df = pd.DataFrame(correction_data)
|
33 |
+
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
34 |
+
|
35 |
+
# Convert the DataFrame back to CSV and upload
|
36 |
+
updated_csv_data = updated_df.to_csv(index=False)
|
37 |
+
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
38 |
+
|
39 |
+
def show_deteksi_kontengcs():
|
40 |
+
st.markdown(COMMON_CSS, unsafe_allow_html=True)
|
41 |
+
|
42 |
+
if 'correction' not in st.session_state:
|
43 |
+
st.session_state.correction = None
|
44 |
+
if 'detection_result' not in st.session_state:
|
45 |
+
st.session_state.detection_result = None
|
46 |
+
if 'lime_explanation' not in st.session_state:
|
47 |
+
st.session_state.lime_explanation = None
|
48 |
+
if 'headline' not in st.session_state:
|
49 |
+
st.session_state.headline = ""
|
50 |
+
if 'content' not in st.session_state:
|
51 |
+
st.session_state.content = ""
|
52 |
+
if 'is_correct' not in st.session_state:
|
53 |
+
st.session_state.is_correct = None
|
54 |
+
|
55 |
+
# Dropdown for selecting a model
|
56 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
|
57 |
+
selected_model = st.selectbox(
|
58 |
+
"",
|
59 |
+
[
|
60 |
+
"cahya/bert-base-indonesian-522M",
|
61 |
+
"indobenchmark/indobert-base-p2",
|
62 |
+
"indolem/indobert-base-uncased",
|
63 |
+
"mdhugol/indonesia-bert-sentiment-classification"
|
64 |
+
],
|
65 |
+
key="model_selector_content"
|
66 |
+
)
|
67 |
+
|
68 |
+
# Load the selected model
|
69 |
+
tokenizer, model = load_model(selected_model)
|
70 |
+
|
71 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Judul Berita :</h6>", unsafe_allow_html=True)
|
72 |
+
st.session_state.headline = st.text_input("", value=st.session_state.headline)
|
73 |
+
|
74 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Konten Berita :</h6>", unsafe_allow_html=True)
|
75 |
+
st.session_state.content = st.text_area("", value=st.session_state.content)
|
76 |
+
|
77 |
+
# Detection button
|
78 |
+
if st.button("Deteksi", key="detect_content"):
|
79 |
+
st.session_state.detection_result = predict_hoax(st.session_state.headline, st.session_state.content)
|
80 |
+
st.success(f"Prediksi: {st.session_state.detection_result}")
|
81 |
+
|
82 |
+
# Prepare the text for LIME
|
83 |
+
lime_texts = [f"{st.session_state.headline} [SEP] {st.session_state.content}"]
|
84 |
+
|
85 |
+
# Add a spinner and progress bar to indicate processing
|
86 |
+
with st.spinner("Sedang memproses LIME, harap tunggu..."):
|
87 |
+
# Explain the prediction
|
88 |
+
explainer = LimeTextExplainer(class_names=['NON-HOAX', 'HOAX'])
|
89 |
+
explanation = explainer.explain_instance(lime_texts[0], predict_proba_for_lime, num_features=5, num_samples=1000)
|
90 |
+
|
91 |
+
# Save the LIME explanation in session state
|
92 |
+
st.session_state.lime_explanation = explanation.as_html()
|
93 |
+
|
94 |
+
# Display the detection result and LIME explanation if available
|
95 |
+
if st.session_state.lime_explanation:
|
96 |
+
lime_html = st.session_state.lime_explanation
|
97 |
+
|
98 |
+
# Inject CSS for font size adjustment
|
99 |
+
lime_html = f"""
|
100 |
+
<style>
|
101 |
+
.lime-text-explanation, .lime-highlight, .lime-classification,
|
102 |
+
.lime-text-explanation * {{
|
103 |
+
font-size: 14px !important;
|
104 |
+
}}
|
105 |
+
</style>
|
106 |
+
<div class="lime-text-explanation">
|
107 |
+
{lime_html}
|
108 |
+
</div>
|
109 |
+
"""
|
110 |
+
components.html(lime_html, height=200, scrolling=True)
|
111 |
+
|
112 |
+
# Display a radio button asking if the detection result is correct
|
113 |
+
if st.session_state.detection_result is not None:
|
114 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: -150px;'>Apakah hasil deteksi sudah benar?</h6>", unsafe_allow_html=True)
|
115 |
+
st.session_state.is_correct = st.radio("", ("Ya", "Tidak"))
|
116 |
+
|
117 |
+
if st.session_state.is_correct == "Ya":
|
118 |
+
st.success("Deteksi sudah benar.")
|
119 |
+
else:
|
120 |
+
# Determine the correction based on the prediction
|
121 |
+
st.session_state.correction = "HOAX" if st.session_state.detection_result == "NON-HOAX" else "NON-HOAX"
|
122 |
+
|
123 |
+
# Display the correction DataFrame
|
124 |
+
correction_data = [{
|
125 |
+
'Title': st.session_state.headline,
|
126 |
+
'Content': st.session_state.content,
|
127 |
+
'Prediction': st.session_state.detection_result,
|
128 |
+
'Correction': st.session_state.correction,
|
129 |
+
'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
130 |
+
}]
|
131 |
+
|
132 |
+
# Save button
|
133 |
+
if st.button("Simpan"):
|
134 |
+
# Save the correction data to GCS
|
135 |
+
save_corrections_to_gcs("your-bucket-name", "koreksi_pengguna.csv", correction_data)
|
136 |
+
|
137 |
+
# Create a formatted string with CSS for alignment and multi-line content handling
|
138 |
+
formatted_text = f"""
|
139 |
+
<div style='font-size: 14px;'>
|
140 |
+
<p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Title</span> : <span style='white-space: pre-wrap;'>{st.session_state.headline}</span></p>
|
141 |
+
<p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Content</span> : <span style='white-space: pre-wrap;'>{st.session_state.content}</span></p>
|
142 |
+
<p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Prediction</span> : {st.session_state.detection_result}</p>
|
143 |
+
<p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Correction</span> : {st.session_state.correction}</p>
|
144 |
+
</div>
|
145 |
+
"""
|
146 |
+
|
147 |
+
# Display the correction as text
|
148 |
+
st.markdown(formatted_text, unsafe_allow_html=True)
|
149 |
+
st.success("Koreksi telah disimpan.")
|
deteksi_upload.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
4 |
+
from test import predict_hoax, evaluate_model_performance
|
5 |
+
from load_model import load_model
|
6 |
+
from styles import COMMON_CSS
|
7 |
+
from google.cloud import storage
|
8 |
+
from io import StringIO
|
9 |
+
import os
|
10 |
+
from datetime import datetime
|
11 |
+
|
12 |
+
# Set environment variable for Google Cloud credentials
|
13 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
|
14 |
+
|
15 |
+
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
16 |
+
client = storage.Client()
|
17 |
+
bucket = client.bucket("dashboardhoax-bucket")
|
18 |
+
blob = bucket.blob("koreksi_pengguna_file.csv")
|
19 |
+
|
20 |
+
# Check if the blob (file) exists
|
21 |
+
if blob.exists():
|
22 |
+
# Download existing CSV from GCS
|
23 |
+
existing_data = blob.download_as_string().decode('utf-8')
|
24 |
+
existing_df = pd.read_csv(StringIO(existing_data))
|
25 |
+
else:
|
26 |
+
# Create a new DataFrame if the file does not exist
|
27 |
+
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
28 |
+
|
29 |
+
# Append the new data to the existing data
|
30 |
+
new_data_df = pd.DataFrame(correction_data)
|
31 |
+
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
32 |
+
|
33 |
+
# Convert the DataFrame back to CSV and upload
|
34 |
+
updated_csv_data = updated_df.to_csv(index=False)
|
35 |
+
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
36 |
+
|
37 |
+
def load_data(file):
|
38 |
+
return pd.read_csv(file)
|
39 |
+
|
40 |
+
def show_deteksi_uploadgcs():
|
41 |
+
st.markdown(COMMON_CSS, unsafe_allow_html=True)
|
42 |
+
|
43 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
|
44 |
+
selected_model = st.selectbox(
|
45 |
+
"",
|
46 |
+
[
|
47 |
+
"cahya/bert-base-indonesian-522M",
|
48 |
+
"indobenchmark/indobert-base-p2",
|
49 |
+
"indolem/indobert-base-uncased",
|
50 |
+
"mdhugol/indonesia-bert-sentiment-classification"
|
51 |
+
],
|
52 |
+
key="model_selector_upload"
|
53 |
+
)
|
54 |
+
|
55 |
+
tokenizer, model = load_model(selected_model)
|
56 |
+
|
57 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
|
58 |
+
uploaded_file = st.file_uploader("", type="csv")
|
59 |
+
|
60 |
+
if 'df' not in st.session_state:
|
61 |
+
st.session_state.df = None
|
62 |
+
|
63 |
+
if uploaded_file is not None:
|
64 |
+
df = load_data(uploaded_file)
|
65 |
+
df.index = df.index + 1
|
66 |
+
|
67 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
|
68 |
+
|
69 |
+
grid_options = GridOptionsBuilder.from_dataframe(df)
|
70 |
+
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
71 |
+
gridOptions = grid_options.build()
|
72 |
+
|
73 |
+
AgGrid(
|
74 |
+
df,
|
75 |
+
gridOptions=gridOptions,
|
76 |
+
update_mode=GridUpdateMode.VALUE_CHANGED,
|
77 |
+
use_container_width=True
|
78 |
+
)
|
79 |
+
|
80 |
+
if st.button("Deteksi", key="detect_upload"):
|
81 |
+
try:
|
82 |
+
df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
|
83 |
+
df['Correction'] = False
|
84 |
+
st.session_state.df = df.copy()
|
85 |
+
except Exception as e:
|
86 |
+
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
87 |
+
|
88 |
+
if st.session_state.df is not None:
|
89 |
+
|
90 |
+
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
|
91 |
+
performance_text = (
|
92 |
+
f"*Performansi Model*\n\n"
|
93 |
+
f"*Accuracy:* {round(accuracy, 2)} "
|
94 |
+
f"*Precision:* {round(precision, 2)} "
|
95 |
+
f"*Recall:* {round(recall, 2)} "
|
96 |
+
f"*F1 Score:* {round(f1, 2)}"
|
97 |
+
)
|
98 |
+
|
99 |
+
st.success(performance_text)
|
100 |
+
|
101 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
102 |
+
|
103 |
+
cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
|
104 |
+
df_reordered = st.session_state.df[cols]
|
105 |
+
|
106 |
+
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
107 |
+
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
108 |
+
grid_options.configure_default_column(editable=True, groupable=True)
|
109 |
+
gridOptions = grid_options.build()
|
110 |
+
|
111 |
+
grid_response = AgGrid(
|
112 |
+
st.session_state.df,
|
113 |
+
gridOptions=gridOptions,
|
114 |
+
update_mode=GridUpdateMode.VALUE_CHANGED
|
115 |
+
)
|
116 |
+
|
117 |
+
if grid_response['data'] is not None:
|
118 |
+
edited_df = pd.DataFrame(grid_response['data'])
|
119 |
+
st.session_state.df = edited_df.copy()
|
120 |
+
corrected_df = edited_df[edited_df['Correction']].copy()
|
121 |
+
|
122 |
+
edited_df['Result_Correction'] = edited_df.apply(lambda row:
|
123 |
+
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
124 |
+
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
125 |
+
axis=1
|
126 |
+
)
|
127 |
+
|
128 |
+
st.session_state.df = edited_df.copy()
|
129 |
+
|
130 |
+
if not corrected_df.empty:
|
131 |
+
corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
|
132 |
+
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
133 |
+
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
134 |
+
axis=1
|
135 |
+
)
|
136 |
+
|
137 |
+
# Add Timestamp only for saving
|
138 |
+
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
139 |
+
|
140 |
+
cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
|
141 |
+
corrected_df_to_display = corrected_df[cols]
|
142 |
+
|
143 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
144 |
+
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
145 |
+
else:
|
146 |
+
st.write("Tidak ada data yang dikoreksi.")
|
147 |
+
|
148 |
+
if st.button("Simpan", key="corrected_data"):
|
149 |
+
if 'df' in st.session_state:
|
150 |
+
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
151 |
+
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
152 |
+
corrected_df = corrected_df.drop(columns=['Correction'])
|
153 |
+
|
154 |
+
if not corrected_df.empty:
|
155 |
+
# Define GCS bucket and file name
|
156 |
+
bucket_name = "your-bucket-name"
|
157 |
+
file_name = "corrected_upload_data.csv"
|
158 |
+
|
159 |
+
# Convert DataFrame to list of dicts for GCS
|
160 |
+
correction_data = corrected_df.to_dict(orient='records')
|
161 |
+
|
162 |
+
# Save corrected data to GCS
|
163 |
+
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
164 |
+
|
165 |
+
st.success("Data telah disimpan.")
|
166 |
+
st.session_state.corrected_df = corrected_df
|
167 |
+
else:
|
168 |
+
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
169 |
+
else:
|
170 |
+
st.warning("Data deteksi tidak ditemukan.")
|
home.py
ADDED
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
+
from wordcloud import WordCloud, STOPWORDS
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
# Caching data loading
|
8 |
+
@st.cache_data
|
9 |
+
def load_data():
|
10 |
+
df = pd.read_csv("mafindo_mix_llm.csv")
|
11 |
+
return df
|
12 |
+
|
13 |
+
# Caching WordCloud generation
|
14 |
+
@st.cache_resource
|
15 |
+
def generate_wordcloud(text, colormap, stopwords):
|
16 |
+
wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text)
|
17 |
+
return wordcloud
|
18 |
+
|
19 |
+
def show_home():
|
20 |
+
# Load the dataset
|
21 |
+
df = load_data()
|
22 |
+
|
23 |
+
# Convert 'Tanggal' to datetime
|
24 |
+
df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
|
25 |
+
df['Year'] = df['Tanggal'].dt.year
|
26 |
+
|
27 |
+
# Convert text columns to string to avoid type errors
|
28 |
+
df['Content'] = df['Content'].astype(str)
|
29 |
+
|
30 |
+
# Define additional stopwords
|
31 |
+
additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"}
|
32 |
+
|
33 |
+
# Combine default stopwords with additional stopwords
|
34 |
+
combined_stopwords = set(STOPWORDS).union(additional_stopwords)
|
35 |
+
|
36 |
+
|
37 |
+
# Row with 4 visualizations
|
38 |
+
col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5])
|
39 |
+
|
40 |
+
# Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly
|
41 |
+
with col1:
|
42 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax</h6>", unsafe_allow_html=True)
|
43 |
+
df_label_counts = df['Label'].value_counts().reset_index()
|
44 |
+
df_label_counts.columns = ['Label', 'Jumlah']
|
45 |
+
bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label',
|
46 |
+
color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
|
47 |
+
bar_chart_label.update_layout(
|
48 |
+
width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah',
|
49 |
+
xaxis_title_font_size=10, yaxis_title_font_size=10,
|
50 |
+
xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
|
51 |
+
showlegend=False
|
52 |
+
)
|
53 |
+
st.plotly_chart(bar_chart_label, use_container_width=False)
|
54 |
+
|
55 |
+
# Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly
|
56 |
+
with col2:
|
57 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax per Data Source</h6>", unsafe_allow_html=True)
|
58 |
+
datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts')
|
59 |
+
fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group',
|
60 |
+
color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
|
61 |
+
fig_datasource.update_layout(
|
62 |
+
width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah',
|
63 |
+
xaxis_title_font_size=10, yaxis_title_font_size=10,
|
64 |
+
xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0,
|
65 |
+
margin=dict(t=10, b=10, l=10, r=50),
|
66 |
+
legend=dict(
|
67 |
+
font=dict(size=8), # Smaller font size for the legend
|
68 |
+
traceorder='normal',
|
69 |
+
orientation='v', # Vertical orientation of the legend
|
70 |
+
title_text='Label', # Title for the legend
|
71 |
+
yanchor='top', y=1, xanchor='left', x=1.05, # Adjust position of the legend
|
72 |
+
bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
|
73 |
+
bordercolor='rgba(0, 0, 0, 0)' # No border color
|
74 |
+
),
|
75 |
+
showlegend=True
|
76 |
+
)
|
77 |
+
st.plotly_chart(fig_datasource, use_container_width=False)
|
78 |
+
|
79 |
+
# Visualization 3: Line chart for Hoax per Year using Plotly
|
80 |
+
with col3:
|
81 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax per Tahun</h6>", unsafe_allow_html=True)
|
82 |
+
|
83 |
+
# Filter data to include only years up to 2023
|
84 |
+
hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count')
|
85 |
+
|
86 |
+
line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear',
|
87 |
+
color_discrete_sequence=['red'])
|
88 |
+
line_chart_hoax.update_layout(
|
89 |
+
width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax',
|
90 |
+
xaxis_title_font_size=10, yaxis_title_font_size=10,
|
91 |
+
xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
|
92 |
+
showlegend=False
|
93 |
+
)
|
94 |
+
st.plotly_chart(line_chart_hoax, use_container_width=False)
|
95 |
+
|
96 |
+
|
97 |
+
# Visualization 4: Bar chart for Topics per Year using Plotly
|
98 |
+
with col4:
|
99 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Topik per Tahun</h6>", unsafe_allow_html=True)
|
100 |
+
df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
|
101 |
+
df['Year'] = df['Tanggal'].dt.year
|
102 |
+
|
103 |
+
# Filter the data to include only years up to 2023
|
104 |
+
df_mafindo_filtered = df[df['Year'] <= 2023]
|
105 |
+
|
106 |
+
topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count')
|
107 |
+
|
108 |
+
# Create the vertical bar chart
|
109 |
+
bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic',
|
110 |
+
color_continuous_scale=px.colors.sequential.Viridis)
|
111 |
+
|
112 |
+
# Update layout to adjust the legend
|
113 |
+
bar_chart_topics.update_layout(
|
114 |
+
width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik',
|
115 |
+
xaxis_title_font_size=10, yaxis_title_font_size=10,
|
116 |
+
xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
|
117 |
+
showlegend=True,
|
118 |
+
legend=dict(
|
119 |
+
yanchor="top", y=1, xanchor="left", x=1.02, # Adjust position of the legend
|
120 |
+
bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
|
121 |
+
bordercolor='rgba(0, 0, 0, 0)', # No border color
|
122 |
+
itemclick='toggleothers', # Allow toggling of legend items
|
123 |
+
itemsizing='constant', # Consistent sizing for legend items
|
124 |
+
font=dict(size=8),
|
125 |
+
traceorder='normal',
|
126 |
+
orientation='v', # Vertical orientation of legend
|
127 |
+
title_text='Topic'
|
128 |
+
)
|
129 |
+
)
|
130 |
+
|
131 |
+
st.plotly_chart(bar_chart_topics, use_container_width=True)
|
132 |
+
|
133 |
+
|
134 |
+
# Create a new row for WordCloud visualizations
|
135 |
+
col5, col6, col7 = st.columns([2, 2.5, 2.5])
|
136 |
+
|
137 |
+
# Wordcloud for Hoax
|
138 |
+
with col5:
|
139 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Wordcloud for Hoax</h6>", unsafe_allow_html=True)
|
140 |
+
hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content'])
|
141 |
+
wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords)
|
142 |
+
fig_hoax = plt.figure(figsize=(5, 2.5))
|
143 |
+
plt.imshow(wordcloud_hoax, interpolation='bilinear')
|
144 |
+
plt.axis('off')
|
145 |
+
st.pyplot(fig_hoax)
|
146 |
+
|
147 |
+
with col6:
|
148 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Klasifikasi</h6>", unsafe_allow_html=True)
|
149 |
+
df_classification_counts = df['Classification'].value_counts().reset_index()
|
150 |
+
df_classification_counts.columns = ['Classification', 'Count']
|
151 |
+
|
152 |
+
# Create the donut chart
|
153 |
+
donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count',
|
154 |
+
hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
|
155 |
+
|
156 |
+
# Update layout to move the legend and adjust its size
|
157 |
+
donut_chart_classification.update_layout(
|
158 |
+
width=300, height=170, # Adjust the size of the chart
|
159 |
+
margin=dict(t=20, b=20, l=20, r=120), # Adjust margins to make room for the legend
|
160 |
+
legend=dict(
|
161 |
+
yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
|
162 |
+
bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
|
163 |
+
bordercolor='rgba(0, 0, 0, 0)', # No border color
|
164 |
+
itemclick='toggleothers', # Allow toggling of legend items
|
165 |
+
itemsizing='constant', # Consistent sizing for legend items
|
166 |
+
font=dict(size=8), # Smaller font size for the legend
|
167 |
+
traceorder='normal',
|
168 |
+
orientation='v', # Vertical legend
|
169 |
+
title_text='Classification' # Title for the legend
|
170 |
+
)
|
171 |
+
)
|
172 |
+
st.plotly_chart(donut_chart_classification, use_container_width=True)
|
173 |
+
|
174 |
+
with col7:
|
175 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Tone</h6>", unsafe_allow_html=True)
|
176 |
+
df_tone_counts = df['Tone'].value_counts().reset_index()
|
177 |
+
df_tone_counts.columns = ['Tone', 'Count']
|
178 |
+
|
179 |
+
# Create the donut chart
|
180 |
+
donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count',
|
181 |
+
hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
|
182 |
+
|
183 |
+
# Update layout to move the legend and adjust its size
|
184 |
+
donut_chart_tone.update_layout(
|
185 |
+
width=250, height=170, # Adjust the size of the chart
|
186 |
+
margin=dict(t=20, b=20, l=20, r=100), # Adjust margins to make room for the legend
|
187 |
+
legend=dict(
|
188 |
+
yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
|
189 |
+
bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
|
190 |
+
bordercolor='rgba(0, 0, 0, 0)', # No border color
|
191 |
+
itemclick='toggleothers', # Allow toggling of legend items
|
192 |
+
itemsizing='constant', # Consistent sizing for legend items
|
193 |
+
font=dict(size=8), # Smaller font size for the legend
|
194 |
+
traceorder='normal',
|
195 |
+
orientation='v', # Vertical legend
|
196 |
+
title_text='Tone' # Title for the legend
|
197 |
+
)
|
198 |
+
)
|
199 |
+
st.plotly_chart(donut_chart_tone, use_container_width=True)
|
200 |
+
|
201 |
+
# Evaluation Metrics Table
|
202 |
+
data = [
|
203 |
+
["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023],
|
204 |
+
["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358],
|
205 |
+
["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124],
|
206 |
+
["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191]
|
207 |
+
]
|
208 |
+
|
209 |
+
highest_accuracy = max(data, key=lambda x: x[-1])
|
210 |
+
|
211 |
+
# Header Table
|
212 |
+
html_table = """
|
213 |
+
<table style="width:100%; border-collapse: collapse; font-size: 12px;">
|
214 |
+
<tr>
|
215 |
+
<th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Pre-trained Model</th>
|
216 |
+
<th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">NON-HOAX</th>
|
217 |
+
<th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">HOAX</th>
|
218 |
+
<th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Accuracy</th>
|
219 |
+
</tr>
|
220 |
+
<tr>
|
221 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
|
222 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
|
223 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
|
224 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
|
225 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
|
226 |
+
<th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
|
227 |
+
</tr>
|
228 |
+
"""
|
229 |
+
# Isi Data
|
230 |
+
for row in data:
|
231 |
+
if row == highest_accuracy:
|
232 |
+
html_table += "<tr style='background-color: #41B3A2; font-size: 12px;'>"
|
233 |
+
else:
|
234 |
+
html_table += "<tr style= ' font-size: 12px;'>"
|
235 |
+
for item in row:
|
236 |
+
html_table += f"<td style='border: 1px solid black; padding: 5px; font-size: 12px;'>{item}</td>"
|
237 |
+
html_table += "</tr>"
|
238 |
+
|
239 |
+
html_table += "</table>"
|
240 |
+
# Tampilkan Tabel di Streamlit
|
241 |
+
col8 = st.columns([5])
|
242 |
+
with col8[0]:
|
243 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Evaluation Metrics</h6>", unsafe_allow_html=True)
|
244 |
+
st.markdown(html_table, unsafe_allow_html=True)
|
245 |
+
|
246 |
+
html_table_col9 = """
|
247 |
+
<div style='text-align: center;'>
|
248 |
+
<table style="width: 100%; margin: -5px 0; font-size: 12px; border-collapse: collapse; border: 1px solid black;">
|
249 |
+
<thead>
|
250 |
+
<tr style="background-color: #e0e0e0;">
|
251 |
+
<th style="padding: 8px; border: 1px solid black; font-weight: bold;">Label</th>
|
252 |
+
<th style="padding: 8px; border: 1px solid black; font-weight: bold;">Train</th>
|
253 |
+
<th style="padding: 8px; border: 1px solid black; font-weight: bold;">Test</th>
|
254 |
+
<th style="padding: 8px; border: 1px solid black; font-weight: bold;">Dev</th>
|
255 |
+
</tr>
|
256 |
+
</thead>
|
257 |
+
<tbody>
|
258 |
+
<tr style="border-bottom: 1px solid black;">
|
259 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">HOAX</td>
|
260 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">11,563</td>
|
261 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
|
262 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
|
263 |
+
</tr>
|
264 |
+
<tr style="border-bottom: 1px solid black;">
|
265 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">NON-HOAX</td>
|
266 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">789</td>
|
267 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
|
268 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
|
269 |
+
</tr>
|
270 |
+
<tr style="font-weight: bold; border-top: 1px solid black;">
|
271 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">TOTAL</td>
|
272 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">12,352</td>
|
273 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
|
274 |
+
<td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
|
275 |
+
</tr>
|
276 |
+
</tbody>
|
277 |
+
</table>
|
278 |
+
</div>
|
279 |
+
"""
|
280 |
+
|
281 |
+
# Display the table in col9 using HTML
|
282 |
+
col9 = st.columns([1]) # Adjust the number and width of columns as needed
|
283 |
+
with col9[0]:
|
284 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Statistik Data</h6>", unsafe_allow_html=True)
|
285 |
+
st.markdown(html_table_col9, unsafe_allow_html=True)
|
inbound-source-431806-g7-e49e388ce0be.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "inbound-source-431806-g7",
|
4 |
+
"private_key_id": "e49e388ce0bed9704aedad42a56d8e3982e0120f",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC2eXeqNjR8Gaeb\npUekLAbieWiqvxGak71OFj4t1/fBemDduW1tsjN1biZzosJR3KERpIWQ0z0vbDwt\n3zOuvdf7XxEZ09Iopp3TRqk/qrrAQzanLAhkAJ8K8czNAlkaZ3KYW2j9bU+xk4P8\nFNpoHZwFZJczLujFxULxIU90KGqigXdvkdvyevfH1mxLlCuXL6F6bFsHuF8ckt2Q\nWKQ4bVnHW8w6CymhmJVgFxX68HxoTbObeoaRzkd5kjJvdJ+A4MQbdzLyHxlPxxZn\nme1LICIQKlsUrk6MSHzMcrl8BN0lMj8k6DgIO2WD/uCXRwemAJl3YoJc5BZN2Luz\nI0DTsDB1AgMBAAECggEANEVga6BicYhR1IrIlnVMNZUM0BiyvMKEkHlbr3s1zDU3\nyVwkRi+tgP6gQjDGFHgspaao4j84wDxzkrplDjHwzF/DwM/GXIG6JTsRIZ1RKOE4\nJzQ8ZRUueg6hGbsJ9j/a+lz5Gtu04Av/W3dHx1pwBrV1gKJ36KtkzTk7Du3C+jC6\n4fQeTim1ebrIkj3Hu88lV4cDMJENWDnHFXj0ww8SygaNDoT7X9E96iRwubMiE7AK\nI8JNJqFsO1S7nQPDbp4KceQwjVOWE1djTxtxYhcFAGOSacKVKcLz0mcQstXvUzns\nUfIj1+2l8dxRsHMRkg+bllD99aXJ2PrLKBp13ia90wKBgQDqKAVBtlATOFWtsdmG\nPJ6i1+SzzokuGoQO7UwweLtiGXMelr1rdmGTbtsg+/OQC2sFfDJtyKM2xhcXc07U\nxGQZGrPaGmVEAMDujayxgEyw46Dm8H9phGUekoAO8dsSRHynZ4KIGULtZZ/jasYp\nnHJOFVUeL9libv2hdyBC2zPWfwKBgQDHfzZdBKI9/OVo9S69CCoJ+lXs+n7H0/H5\n1wXLYcVfurVs4p+AGXA+F+bZJGFnrYWUwTS8DbB4cTISCURyanxd/IU22qfjp646\nJPTpMLefdqRf01x5jxOHt3NbWTwOWQL/jCoC10VaIeY0jAWRcpYpGj/lbNenyQB6\nQWO8GyeHCwKBgQC1EgOWoBvl8P9YVRqoEoJ93MNvQ/yS2VBblqb/KK2Gm7WI5vpN\nenrUHrp3FD5xmlLFKBh7CtcjySUcLj+8iq35N8vykczTPF31Wzs6+8LSWwQW8c0l\nVIs5jAJZDC/jPXDDp2iqRBacK6TroKrijKdbuGVc9ZV95+RcExmweX/pkwKBgQCI\nSyry5cWKIAsDZ+6kir1dz7+Ahaq0DuLUU8jLqGJWApMMbs+VjsuWQHIgi7BYSr5m\nYJEMoTWdM4iHtfkjSgjplSnVzhDBgb+QTctcvUHWGhI2vYoCKnOnVvfiwtY63ykj\nOblB85yX9Wz3HWp4chaQwjRBI9k58iL3Y1EmJE8e/QKBgF1HJKXaaXNogVHN/O5+\nh5YvWAQlWkmfL9sD89Gt1regkd+DM/Vfx+0yPuCgfopmOc72WO5gMQ6TlcH+MRQS\nPc2O7cHbit2IxsKfYYANOLjfhXAiIYC+yvArdzTwn53Wni+USnFH1YD1XUV8wTGj\nT0XsKoxnGUq4twTDK6re0oRl\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "dashboardhoax-service-account@inbound-source-431806-g7.iam.gserviceaccount.com",
|
7 |
+
"client_id": "110233701696815226341",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dashboardhoax-service-account%40inbound-source-431806-g7.iam.gserviceaccount.com",
|
12 |
+
"universe_domain": "googleapis.com"
|
13 |
+
}
|
load_model.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
# Dictionary to map model names to their paths
|
5 |
+
model_paths = {
|
6 |
+
"cahya/bert-base-indonesian-522M": "Nakhwa/cahyabert",
|
7 |
+
"indobenchmark/indobert-base-p2": "Nakhwa/indobenchmark",
|
8 |
+
"indolem/indobert-base-uncased": "Nakhwa/indolem",
|
9 |
+
"mdhugol/indonesia-bert-sentiment-classification": "Nakhwa/mdhugol"
|
10 |
+
}
|
11 |
+
|
12 |
+
# Function to load the selected model
|
13 |
+
@st.cache_resource
|
14 |
+
def load_model(model_name):
|
15 |
+
path = model_paths[model_name]
|
16 |
+
tokenizer = BertTokenizer.from_pretrained(path)
|
17 |
+
model = BertForSequenceClassification.from_pretrained(path)
|
18 |
+
model.eval()
|
19 |
+
return tokenizer, model
|
mafindo_mix_llm.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf40365bdcaf731eadc84f7c1622c75763d7277631749bed31adad6e90ff8e6
|
3 |
+
size 19922497
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.37.1
|
2 |
+
pandas==2.2.2
|
3 |
+
plotly==5.13.0
|
4 |
+
wordcloud==1.9.3
|
5 |
+
matplotlib==3.9.2
|
6 |
+
lime==0.2.0.1
|
7 |
+
torch==2.3.1
|
8 |
+
numpy==1.26.4
|
9 |
+
transformers==4.41.2
|
10 |
+
streamlit-aggrid==1.0.5
|
11 |
+
scikit-learn==1.5.1
|
styles.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# styles.py
|
2 |
+
|
3 |
+
COMMON_CSS = """
|
4 |
+
<style>
|
5 |
+
.stSelectbox div[data-baseweb="select"] {
|
6 |
+
margin-top: -35px;
|
7 |
+
}
|
8 |
+
.stTextInput div[data-baseweb="input"] {
|
9 |
+
margin-top: -35px;
|
10 |
+
}
|
11 |
+
.stTextArea div[data-baseweb="textarea"] {
|
12 |
+
margin-top: -35px;
|
13 |
+
}
|
14 |
+
.stFileUploader div[data-baseweb="input"] {
|
15 |
+
margin-top: -100px;
|
16 |
+
}
|
17 |
+
.stSelectbox {
|
18 |
+
max-width: 300px;
|
19 |
+
}
|
20 |
+
.stTextInput, .stTextArea {
|
21 |
+
max-width: 1400px;
|
22 |
+
}
|
23 |
+
.stSelectbox div, .stTextInput input, .stTextArea textarea {
|
24 |
+
font-size: 14px;
|
25 |
+
}
|
26 |
+
.stButton > button {
|
27 |
+
font-size: 6px;
|
28 |
+
padding: 2px 8px;
|
29 |
+
border-radius: 10px;
|
30 |
+
background-color: #1560BD;
|
31 |
+
color: white;
|
32 |
+
}
|
33 |
+
.stButton > button:hover {
|
34 |
+
background-color: #1560BD;
|
35 |
+
border: none;
|
36 |
+
outline: none;
|
37 |
+
}
|
38 |
+
.stRadio div[data-baseweb="radio"] {
|
39 |
+
font-size: 14px; /* Ensure font size for the entire radio button group */
|
40 |
+
margin-top: -100px; /* Reduce margin between label and radio button */
|
41 |
+
}
|
42 |
+
</style>
|
43 |
+
"""
|
test.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.nn.functional import softmax
|
3 |
+
from load_model import load_model # Import the load_model function
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
6 |
+
import streamlit as st
|
7 |
+
|
8 |
+
@st.cache_resource
|
9 |
+
def get_model_and_tokenizer(model_name):
|
10 |
+
return load_model(model_name)
|
11 |
+
|
12 |
+
# Initialize default model (could be anything, or even load dynamically)
|
13 |
+
default_model_name = "cahya/bert-base-indonesian-522M"
|
14 |
+
tokenizer, model = load_model(default_model_name)
|
15 |
+
|
16 |
+
# Prediction function
|
17 |
+
def predict_hoax(title, content):
|
18 |
+
if tokenizer is None or model is None:
|
19 |
+
raise ValueError("Model and tokenizer must be loaded before prediction.")
|
20 |
+
|
21 |
+
print(f"Using model: {model}")
|
22 |
+
print(f"Using tokenizer: {tokenizer}")
|
23 |
+
|
24 |
+
text = f"{title} [SEP] {content}"
|
25 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
|
26 |
+
with torch.no_grad():
|
27 |
+
outputs = model(**inputs)
|
28 |
+
probs = softmax(outputs.logits, dim=1)
|
29 |
+
pred = torch.argmax(probs, dim=1).item()
|
30 |
+
label = 'HOAX' if pred == 1 else 'NON-HOAX'
|
31 |
+
return label
|
32 |
+
|
33 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
34 |
+
# model.to(device)
|
35 |
+
|
36 |
+
# LIME prediction function
|
37 |
+
def predict_proba_for_lime(texts):
|
38 |
+
results = []
|
39 |
+
for text in texts:
|
40 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
|
41 |
+
with torch.no_grad():
|
42 |
+
outputs = model(**inputs)
|
43 |
+
probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
|
44 |
+
results.append(probs[0])
|
45 |
+
return np.array(results)
|
46 |
+
|
47 |
+
def evaluate_model_performance(df, tokenizer, model):
|
48 |
+
true_labels = []
|
49 |
+
pred_labels = []
|
50 |
+
|
51 |
+
for index, row in df.iterrows():
|
52 |
+
true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
|
53 |
+
pred_label = predict_hoax(row['Title'], row['Content'])
|
54 |
+
|
55 |
+
true_labels.append(1 if true_label == 'HOAX' else 0)
|
56 |
+
pred_labels.append(1 if pred_label == 'HOAX' else 0)
|
57 |
+
|
58 |
+
accuracy = accuracy_score(true_labels, pred_labels)
|
59 |
+
precision = precision_score(true_labels, pred_labels, average='binary')
|
60 |
+
recall = recall_score(true_labels, pred_labels, average='binary')
|
61 |
+
f1 = f1_score(true_labels, pred_labels, average='binary')
|
62 |
+
|
63 |
+
return accuracy, precision, recall, f1
|