Spaces:
Sleeping
Sleeping
File size: 7,490 Bytes
b708f61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import streamlit as st
import pandas as pd
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
from test import predict_hoax, evaluate_model_performance
from load_model import load_model
from styles import COMMON_CSS
from google.cloud import storage
from io import StringIO
import os
from datetime import datetime
# Set environment variable for Google Cloud credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
def save_corrections_to_gcs(bucket_name, file_name, correction_data):
client = storage.Client()
bucket = client.bucket("dashboardhoax-bucket")
blob = bucket.blob("koreksi_pengguna_file.csv")
# Check if the blob (file) exists
if blob.exists():
# Download existing CSV from GCS
existing_data = blob.download_as_string().decode('utf-8')
existing_df = pd.read_csv(StringIO(existing_data))
else:
# Create a new DataFrame if the file does not exist
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
# Append the new data to the existing data
new_data_df = pd.DataFrame(correction_data)
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
# Convert the DataFrame back to CSV and upload
updated_csv_data = updated_df.to_csv(index=False)
blob.upload_from_string(updated_csv_data, content_type='text/csv')
def load_data(file):
return pd.read_csv(file)
def show_deteksi_upload():
st.markdown(COMMON_CSS, unsafe_allow_html=True)
st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
selected_model = st.selectbox(
"",
[
"cahya/bert-base-indonesian-522M",
"indobenchmark/indobert-base-p2",
"indolem/indobert-base-uncased",
"mdhugol/indonesia-bert-sentiment-classification"
],
key="model_selector_upload"
)
tokenizer, model = load_model(selected_model)
st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
uploaded_file = st.file_uploader("", type="csv")
if 'df' not in st.session_state:
st.session_state.df = None
if uploaded_file is not None:
df = load_data(uploaded_file)
df.index = df.index + 1
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
grid_options = GridOptionsBuilder.from_dataframe(df)
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
gridOptions = grid_options.build()
AgGrid(
df,
gridOptions=gridOptions,
update_mode=GridUpdateMode.VALUE_CHANGED,
use_container_width=True
)
if st.button("Deteksi", key="detect_upload"):
try:
df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
df['Correction'] = False
st.session_state.df = df.copy()
except Exception as e:
st.error(f"Terjadi kesalahan saat deteksi: {e}")
if st.session_state.df is not None:
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
performance_text = (
f"*Performansi Model*\n\n"
f"*Accuracy:* {round(accuracy, 2)} "
f"*Precision:* {round(precision, 2)} "
f"*Recall:* {round(recall, 2)} "
f"*F1 Score:* {round(f1, 2)}"
)
st.success(performance_text)
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
df_reordered = st.session_state.df[cols]
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
grid_options.configure_default_column(editable=True, groupable=True)
gridOptions = grid_options.build()
grid_response = AgGrid(
st.session_state.df,
gridOptions=gridOptions,
update_mode=GridUpdateMode.VALUE_CHANGED
)
if grid_response['data'] is not None:
edited_df = pd.DataFrame(grid_response['data'])
st.session_state.df = edited_df.copy()
corrected_df = edited_df[edited_df['Correction']].copy()
edited_df['Result_Correction'] = edited_df.apply(lambda row:
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
axis=1
)
st.session_state.df = edited_df.copy()
if not corrected_df.empty:
corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
axis=1
)
# Add Timestamp only for saving
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
corrected_df_to_display = corrected_df[cols]
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
else:
st.write("Tidak ada data yang dikoreksi.")
if st.button("Simpan", key="corrected_data"):
if 'df' in st.session_state:
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
corrected_df = corrected_df.drop(columns=['Correction'])
if not corrected_df.empty:
# Define GCS bucket and file name
bucket_name = "your-bucket-name"
file_name = "corrected_upload_data.csv"
# Convert DataFrame to list of dicts for GCS
correction_data = corrected_df.to_dict(orient='records')
# Save corrected data to GCS
save_corrections_to_gcs(bucket_name, file_name, correction_data)
st.success("Data telah disimpan.")
st.session_state.corrected_df = corrected_df
else:
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
else:
st.warning("Data deteksi tidak ditemukan.")
|