Spaces:
Running
Running
Update deteksi_upload.py
Browse files- deteksi_upload.py +170 -170
deteksi_upload.py
CHANGED
@@ -1,170 +1,170 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
4 |
-
from test import predict_hoax, evaluate_model_performance
|
5 |
-
from load_model import load_model
|
6 |
-
from styles import COMMON_CSS
|
7 |
-
from google.cloud import storage
|
8 |
-
from io import StringIO
|
9 |
-
import os
|
10 |
-
from datetime import datetime
|
11 |
-
|
12 |
-
# Set environment variable for Google Cloud credentials
|
13 |
-
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
|
14 |
-
|
15 |
-
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
16 |
-
client = storage.Client()
|
17 |
-
bucket = client.bucket("dashboardhoax-bucket")
|
18 |
-
blob = bucket.blob("koreksi_pengguna_file.csv")
|
19 |
-
|
20 |
-
# Check if the blob (file) exists
|
21 |
-
if blob.exists():
|
22 |
-
# Download existing CSV from GCS
|
23 |
-
existing_data = blob.download_as_string().decode('utf-8')
|
24 |
-
existing_df = pd.read_csv(StringIO(existing_data))
|
25 |
-
else:
|
26 |
-
# Create a new DataFrame if the file does not exist
|
27 |
-
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
28 |
-
|
29 |
-
# Append the new data to the existing data
|
30 |
-
new_data_df = pd.DataFrame(correction_data)
|
31 |
-
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
32 |
-
|
33 |
-
# Convert the DataFrame back to CSV and upload
|
34 |
-
updated_csv_data = updated_df.to_csv(index=False)
|
35 |
-
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
36 |
-
|
37 |
-
def load_data(file):
|
38 |
-
return pd.read_csv(file)
|
39 |
-
|
40 |
-
def
|
41 |
-
st.markdown(COMMON_CSS, unsafe_allow_html=True)
|
42 |
-
|
43 |
-
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
|
44 |
-
selected_model = st.selectbox(
|
45 |
-
"",
|
46 |
-
[
|
47 |
-
"cahya/bert-base-indonesian-522M",
|
48 |
-
"indobenchmark/indobert-base-p2",
|
49 |
-
"indolem/indobert-base-uncased",
|
50 |
-
"mdhugol/indonesia-bert-sentiment-classification"
|
51 |
-
],
|
52 |
-
key="model_selector_upload"
|
53 |
-
)
|
54 |
-
|
55 |
-
tokenizer, model = load_model(selected_model)
|
56 |
-
|
57 |
-
st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
|
58 |
-
uploaded_file = st.file_uploader("", type="csv")
|
59 |
-
|
60 |
-
if 'df' not in st.session_state:
|
61 |
-
st.session_state.df = None
|
62 |
-
|
63 |
-
if uploaded_file is not None:
|
64 |
-
df = load_data(uploaded_file)
|
65 |
-
df.index = df.index + 1
|
66 |
-
|
67 |
-
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
|
68 |
-
|
69 |
-
grid_options = GridOptionsBuilder.from_dataframe(df)
|
70 |
-
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
71 |
-
gridOptions = grid_options.build()
|
72 |
-
|
73 |
-
AgGrid(
|
74 |
-
df,
|
75 |
-
gridOptions=gridOptions,
|
76 |
-
update_mode=GridUpdateMode.VALUE_CHANGED,
|
77 |
-
use_container_width=True
|
78 |
-
)
|
79 |
-
|
80 |
-
if st.button("Deteksi", key="detect_upload"):
|
81 |
-
try:
|
82 |
-
df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
|
83 |
-
df['Correction'] = False
|
84 |
-
st.session_state.df = df.copy()
|
85 |
-
except Exception as e:
|
86 |
-
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
87 |
-
|
88 |
-
if st.session_state.df is not None:
|
89 |
-
|
90 |
-
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
|
91 |
-
performance_text = (
|
92 |
-
f"*Performansi Model*\n\n"
|
93 |
-
f"*Accuracy:* {round(accuracy, 2)} "
|
94 |
-
f"*Precision:* {round(precision, 2)} "
|
95 |
-
f"*Recall:* {round(recall, 2)} "
|
96 |
-
f"*F1 Score:* {round(f1, 2)}"
|
97 |
-
)
|
98 |
-
|
99 |
-
st.success(performance_text)
|
100 |
-
|
101 |
-
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
102 |
-
|
103 |
-
cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
|
104 |
-
df_reordered = st.session_state.df[cols]
|
105 |
-
|
106 |
-
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
107 |
-
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
108 |
-
grid_options.configure_default_column(editable=True, groupable=True)
|
109 |
-
gridOptions = grid_options.build()
|
110 |
-
|
111 |
-
grid_response = AgGrid(
|
112 |
-
st.session_state.df,
|
113 |
-
gridOptions=gridOptions,
|
114 |
-
update_mode=GridUpdateMode.VALUE_CHANGED
|
115 |
-
)
|
116 |
-
|
117 |
-
if grid_response['data'] is not None:
|
118 |
-
edited_df = pd.DataFrame(grid_response['data'])
|
119 |
-
st.session_state.df = edited_df.copy()
|
120 |
-
corrected_df = edited_df[edited_df['Correction']].copy()
|
121 |
-
|
122 |
-
edited_df['Result_Correction'] = edited_df.apply(lambda row:
|
123 |
-
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
124 |
-
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
125 |
-
axis=1
|
126 |
-
)
|
127 |
-
|
128 |
-
st.session_state.df = edited_df.copy()
|
129 |
-
|
130 |
-
if not corrected_df.empty:
|
131 |
-
corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
|
132 |
-
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
133 |
-
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
134 |
-
axis=1
|
135 |
-
)
|
136 |
-
|
137 |
-
# Add Timestamp only for saving
|
138 |
-
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
139 |
-
|
140 |
-
cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
|
141 |
-
corrected_df_to_display = corrected_df[cols]
|
142 |
-
|
143 |
-
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
144 |
-
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
145 |
-
else:
|
146 |
-
st.write("Tidak ada data yang dikoreksi.")
|
147 |
-
|
148 |
-
if st.button("Simpan", key="corrected_data"):
|
149 |
-
if 'df' in st.session_state:
|
150 |
-
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
151 |
-
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
152 |
-
corrected_df = corrected_df.drop(columns=['Correction'])
|
153 |
-
|
154 |
-
if not corrected_df.empty:
|
155 |
-
# Define GCS bucket and file name
|
156 |
-
bucket_name = "your-bucket-name"
|
157 |
-
file_name = "corrected_upload_data.csv"
|
158 |
-
|
159 |
-
# Convert DataFrame to list of dicts for GCS
|
160 |
-
correction_data = corrected_df.to_dict(orient='records')
|
161 |
-
|
162 |
-
# Save corrected data to GCS
|
163 |
-
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
164 |
-
|
165 |
-
st.success("Data telah disimpan.")
|
166 |
-
st.session_state.corrected_df = corrected_df
|
167 |
-
else:
|
168 |
-
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
169 |
-
else:
|
170 |
-
st.warning("Data deteksi tidak ditemukan.")
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
4 |
+
from test import predict_hoax, evaluate_model_performance
|
5 |
+
from load_model import load_model
|
6 |
+
from styles import COMMON_CSS
|
7 |
+
from google.cloud import storage
|
8 |
+
from io import StringIO
|
9 |
+
import os
|
10 |
+
from datetime import datetime
|
11 |
+
|
12 |
+
# Set environment variable for Google Cloud credentials
|
13 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
|
14 |
+
|
15 |
+
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
16 |
+
client = storage.Client()
|
17 |
+
bucket = client.bucket("dashboardhoax-bucket")
|
18 |
+
blob = bucket.blob("koreksi_pengguna_file.csv")
|
19 |
+
|
20 |
+
# Check if the blob (file) exists
|
21 |
+
if blob.exists():
|
22 |
+
# Download existing CSV from GCS
|
23 |
+
existing_data = blob.download_as_string().decode('utf-8')
|
24 |
+
existing_df = pd.read_csv(StringIO(existing_data))
|
25 |
+
else:
|
26 |
+
# Create a new DataFrame if the file does not exist
|
27 |
+
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
28 |
+
|
29 |
+
# Append the new data to the existing data
|
30 |
+
new_data_df = pd.DataFrame(correction_data)
|
31 |
+
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
32 |
+
|
33 |
+
# Convert the DataFrame back to CSV and upload
|
34 |
+
updated_csv_data = updated_df.to_csv(index=False)
|
35 |
+
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
36 |
+
|
37 |
+
def load_data(file):
|
38 |
+
return pd.read_csv(file)
|
39 |
+
|
40 |
+
def show_deteksi_upload():
|
41 |
+
st.markdown(COMMON_CSS, unsafe_allow_html=True)
|
42 |
+
|
43 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
|
44 |
+
selected_model = st.selectbox(
|
45 |
+
"",
|
46 |
+
[
|
47 |
+
"cahya/bert-base-indonesian-522M",
|
48 |
+
"indobenchmark/indobert-base-p2",
|
49 |
+
"indolem/indobert-base-uncased",
|
50 |
+
"mdhugol/indonesia-bert-sentiment-classification"
|
51 |
+
],
|
52 |
+
key="model_selector_upload"
|
53 |
+
)
|
54 |
+
|
55 |
+
tokenizer, model = load_model(selected_model)
|
56 |
+
|
57 |
+
st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
|
58 |
+
uploaded_file = st.file_uploader("", type="csv")
|
59 |
+
|
60 |
+
if 'df' not in st.session_state:
|
61 |
+
st.session_state.df = None
|
62 |
+
|
63 |
+
if uploaded_file is not None:
|
64 |
+
df = load_data(uploaded_file)
|
65 |
+
df.index = df.index + 1
|
66 |
+
|
67 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
|
68 |
+
|
69 |
+
grid_options = GridOptionsBuilder.from_dataframe(df)
|
70 |
+
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
71 |
+
gridOptions = grid_options.build()
|
72 |
+
|
73 |
+
AgGrid(
|
74 |
+
df,
|
75 |
+
gridOptions=gridOptions,
|
76 |
+
update_mode=GridUpdateMode.VALUE_CHANGED,
|
77 |
+
use_container_width=True
|
78 |
+
)
|
79 |
+
|
80 |
+
if st.button("Deteksi", key="detect_upload"):
|
81 |
+
try:
|
82 |
+
df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
|
83 |
+
df['Correction'] = False
|
84 |
+
st.session_state.df = df.copy()
|
85 |
+
except Exception as e:
|
86 |
+
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
87 |
+
|
88 |
+
if st.session_state.df is not None:
|
89 |
+
|
90 |
+
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
|
91 |
+
performance_text = (
|
92 |
+
f"*Performansi Model*\n\n"
|
93 |
+
f"*Accuracy:* {round(accuracy, 2)} "
|
94 |
+
f"*Precision:* {round(precision, 2)} "
|
95 |
+
f"*Recall:* {round(recall, 2)} "
|
96 |
+
f"*F1 Score:* {round(f1, 2)}"
|
97 |
+
)
|
98 |
+
|
99 |
+
st.success(performance_text)
|
100 |
+
|
101 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
102 |
+
|
103 |
+
cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
|
104 |
+
df_reordered = st.session_state.df[cols]
|
105 |
+
|
106 |
+
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
107 |
+
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
108 |
+
grid_options.configure_default_column(editable=True, groupable=True)
|
109 |
+
gridOptions = grid_options.build()
|
110 |
+
|
111 |
+
grid_response = AgGrid(
|
112 |
+
st.session_state.df,
|
113 |
+
gridOptions=gridOptions,
|
114 |
+
update_mode=GridUpdateMode.VALUE_CHANGED
|
115 |
+
)
|
116 |
+
|
117 |
+
if grid_response['data'] is not None:
|
118 |
+
edited_df = pd.DataFrame(grid_response['data'])
|
119 |
+
st.session_state.df = edited_df.copy()
|
120 |
+
corrected_df = edited_df[edited_df['Correction']].copy()
|
121 |
+
|
122 |
+
edited_df['Result_Correction'] = edited_df.apply(lambda row:
|
123 |
+
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
124 |
+
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
125 |
+
axis=1
|
126 |
+
)
|
127 |
+
|
128 |
+
st.session_state.df = edited_df.copy()
|
129 |
+
|
130 |
+
if not corrected_df.empty:
|
131 |
+
corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
|
132 |
+
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
|
133 |
+
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
134 |
+
axis=1
|
135 |
+
)
|
136 |
+
|
137 |
+
# Add Timestamp only for saving
|
138 |
+
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
139 |
+
|
140 |
+
cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
|
141 |
+
corrected_df_to_display = corrected_df[cols]
|
142 |
+
|
143 |
+
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
144 |
+
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
145 |
+
else:
|
146 |
+
st.write("Tidak ada data yang dikoreksi.")
|
147 |
+
|
148 |
+
if st.button("Simpan", key="corrected_data"):
|
149 |
+
if 'df' in st.session_state:
|
150 |
+
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
151 |
+
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
152 |
+
corrected_df = corrected_df.drop(columns=['Correction'])
|
153 |
+
|
154 |
+
if not corrected_df.empty:
|
155 |
+
# Define GCS bucket and file name
|
156 |
+
bucket_name = "your-bucket-name"
|
157 |
+
file_name = "corrected_upload_data.csv"
|
158 |
+
|
159 |
+
# Convert DataFrame to list of dicts for GCS
|
160 |
+
correction_data = corrected_df.to_dict(orient='records')
|
161 |
+
|
162 |
+
# Save corrected data to GCS
|
163 |
+
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
164 |
+
|
165 |
+
st.success("Data telah disimpan.")
|
166 |
+
st.session_state.corrected_df = corrected_df
|
167 |
+
else:
|
168 |
+
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
169 |
+
else:
|
170 |
+
st.warning("Data deteksi tidak ditemukan.")
|