Spaces:
Running
Running
File size: 7,231 Bytes
b708f61 38d5029 b708f61 76e3ff9 b708f61 fc8b513 b708f61 fc8b513 b708f61 fc8b513 b708f61 fc8b513 b708f61 fc8b513 b708f61 fc8b513 38d5029 b708f61 fc8b513 38d5029 b708f61 38d5029 b708f61 fc8b513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import streamlit as st
import pandas as pd
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
from test import predict_hoax, evaluate_model_performance
from load_model import load_model
from styles import COMMON_CSS
from google.cloud import storage
from io import StringIO
import os
from datetime import datetime
import pytz
with open("credentials.json", "w") as f:
f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
client = storage.Client()
bucket = client.bucket("dashboardhoax-bucket")
blob = bucket.blob("koreksi_pengguna_file.csv")
if blob.exists():
existing_data = blob.download_as_string().decode('utf-8')
existing_df = pd.read_csv(StringIO(existing_data))
else:
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
new_data_df = pd.DataFrame(correction_data)
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
updated_csv_data = updated_df.to_csv(index=False)
blob.upload_from_string(updated_csv_data, content_type='text/csv')
def load_data(file):
return pd.read_csv(file)
def show_deteksi_upload():
st.markdown(COMMON_CSS, unsafe_allow_html=True)
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
selected_model = st.selectbox(
"",
[
"cahya/bert-base-indonesian-522M",
"indobenchmark/indobert-base-p2",
"indolem/indobert-base-uncased",
"mdhugol/indonesia-bert-sentiment-classification"
],
key="model_selector_upload"
)
tokenizer, model = load_model(selected_model)
st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
uploaded_file = st.file_uploader("", type="csv")
if 'df' not in st.session_state:
st.session_state.df = None
if uploaded_file is not None:
df = load_data(uploaded_file)
df.index = df.index + 1
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
grid_options = GridOptionsBuilder.from_dataframe(df)
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
gridOptions = grid_options.build()
AgGrid(
df,
gridOptions=gridOptions,
update_mode=GridUpdateMode.VALUE_CHANGED,
use_container_width=True
)
if st.button("Deteksi", key="detect_upload"):
try:
df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
df['Correction'] = False
st.session_state.df = df.copy()
except Exception as e:
st.error(f"Terjadi kesalahan saat deteksi: {e}")
if st.session_state.df is not None:
if 'Label' in st.session_state.df.columns:
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
performance_text = (
f"*Performansi Model*\n\n"
f"*Accuracy:* {round(accuracy, 2)} "
f"*Precision:* {round(precision, 2)} "
f"*Recall:* {round(recall, 2)} "
f"*F1 Score:* {round(f1, 2)}"
)
st.success(performance_text)
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
df_reordered = st.session_state.df[cols]
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
grid_options.configure_default_column(editable=True, groupable=True)
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
gridOptions = grid_options.build()
grid_response = AgGrid(
st.session_state.df,
gridOptions=gridOptions,
update_mode=GridUpdateMode.VALUE_CHANGED
)
if grid_response['data'] is not None:
edited_df = pd.DataFrame(grid_response['data'])
st.session_state.df = edited_df.copy()
corrected_df = edited_df[edited_df['Correction']].copy()
edited_df['Result_Correction'] = edited_df.apply(lambda row:
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
axis=1
)
st.session_state.df = edited_df.copy()
if not corrected_df.empty:
expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
existing_cols = [col for col in expected_cols if col in corrected_df.columns]
# Tambahkan Timestamp hanya untuk penyimpanan
wib = pytz.timezone('Asia/Jakarta')
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
corrected_df_to_display = corrected_df[existing_cols]
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
else:
st.write("Tidak ada data yang dikoreksi.")
if st.button("Simpan", key="corrected_data"):
if 'df' in st.session_state:
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
wib = pytz.timezone('Asia/Jakarta')
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
corrected_df = corrected_df.drop(columns=['Correction'])
if not corrected_df.empty:
bucket_name = "your-bucket-name"
file_name = "corrected_upload_data.csv"
correction_data = corrected_df.to_dict(orient='records')
save_corrections_to_gcs(bucket_name, file_name, correction_data)
st.success("Data telah disimpan.")
st.session_state.corrected_df = corrected_df
else:
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
else:
st.warning("Data deteksi tidak ditemukan.") |