Nakhwa commited on
Commit
b708f61
1 Parent(s): d4ac1ba

Update deteksi_upload.py

Browse files
Files changed (1) hide show
  1. deteksi_upload.py +170 -170
deteksi_upload.py CHANGED
@@ -1,170 +1,170 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
- from test import predict_hoax, evaluate_model_performance
5
- from load_model import load_model
6
- from styles import COMMON_CSS
7
- from google.cloud import storage
8
- from io import StringIO
9
- import os
10
- from datetime import datetime
11
-
12
- # Set environment variable for Google Cloud credentials
13
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
14
-
15
- def save_corrections_to_gcs(bucket_name, file_name, correction_data):
16
- client = storage.Client()
17
- bucket = client.bucket("dashboardhoax-bucket")
18
- blob = bucket.blob("koreksi_pengguna_file.csv")
19
-
20
- # Check if the blob (file) exists
21
- if blob.exists():
22
- # Download existing CSV from GCS
23
- existing_data = blob.download_as_string().decode('utf-8')
24
- existing_df = pd.read_csv(StringIO(existing_data))
25
- else:
26
- # Create a new DataFrame if the file does not exist
27
- existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
28
-
29
- # Append the new data to the existing data
30
- new_data_df = pd.DataFrame(correction_data)
31
- updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
32
-
33
- # Convert the DataFrame back to CSV and upload
34
- updated_csv_data = updated_df.to_csv(index=False)
35
- blob.upload_from_string(updated_csv_data, content_type='text/csv')
36
-
37
- def load_data(file):
38
- return pd.read_csv(file)
39
-
40
- def show_deteksi_uploadgcs():
41
- st.markdown(COMMON_CSS, unsafe_allow_html=True)
42
-
43
- st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
44
- selected_model = st.selectbox(
45
- "",
46
- [
47
- "cahya/bert-base-indonesian-522M",
48
- "indobenchmark/indobert-base-p2",
49
- "indolem/indobert-base-uncased",
50
- "mdhugol/indonesia-bert-sentiment-classification"
51
- ],
52
- key="model_selector_upload"
53
- )
54
-
55
- tokenizer, model = load_model(selected_model)
56
-
57
- st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
58
- uploaded_file = st.file_uploader("", type="csv")
59
-
60
- if 'df' not in st.session_state:
61
- st.session_state.df = None
62
-
63
- if uploaded_file is not None:
64
- df = load_data(uploaded_file)
65
- df.index = df.index + 1
66
-
67
- st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
68
-
69
- grid_options = GridOptionsBuilder.from_dataframe(df)
70
- grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
71
- gridOptions = grid_options.build()
72
-
73
- AgGrid(
74
- df,
75
- gridOptions=gridOptions,
76
- update_mode=GridUpdateMode.VALUE_CHANGED,
77
- use_container_width=True
78
- )
79
-
80
- if st.button("Deteksi", key="detect_upload"):
81
- try:
82
- df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
83
- df['Correction'] = False
84
- st.session_state.df = df.copy()
85
- except Exception as e:
86
- st.error(f"Terjadi kesalahan saat deteksi: {e}")
87
-
88
- if st.session_state.df is not None:
89
-
90
- accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
91
- performance_text = (
92
- f"*Performansi Model*\n\n"
93
- f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
94
- f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
95
- f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
96
- f"*F1 Score:* {round(f1, 2)}"
97
- )
98
-
99
- st.success(performance_text)
100
-
101
- st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
102
-
103
- cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
104
- df_reordered = st.session_state.df[cols]
105
-
106
- grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
107
- grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
108
- grid_options.configure_default_column(editable=True, groupable=True)
109
- gridOptions = grid_options.build()
110
-
111
- grid_response = AgGrid(
112
- st.session_state.df,
113
- gridOptions=gridOptions,
114
- update_mode=GridUpdateMode.VALUE_CHANGED
115
- )
116
-
117
- if grid_response['data'] is not None:
118
- edited_df = pd.DataFrame(grid_response['data'])
119
- st.session_state.df = edited_df.copy()
120
- corrected_df = edited_df[edited_df['Correction']].copy()
121
-
122
- edited_df['Result_Correction'] = edited_df.apply(lambda row:
123
- 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
124
- ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
125
- axis=1
126
- )
127
-
128
- st.session_state.df = edited_df.copy()
129
-
130
- if not corrected_df.empty:
131
- corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
132
- 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
133
- ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
134
- axis=1
135
- )
136
-
137
- # Add Timestamp only for saving
138
- corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
139
-
140
- cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
141
- corrected_df_to_display = corrected_df[cols]
142
-
143
- st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
144
- st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
145
- else:
146
- st.write("Tidak ada data yang dikoreksi.")
147
-
148
- if st.button("Simpan", key="corrected_data"):
149
- if 'df' in st.session_state:
150
- corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
151
- corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
152
- corrected_df = corrected_df.drop(columns=['Correction'])
153
-
154
- if not corrected_df.empty:
155
- # Define GCS bucket and file name
156
- bucket_name = "your-bucket-name"
157
- file_name = "corrected_upload_data.csv"
158
-
159
- # Convert DataFrame to list of dicts for GCS
160
- correction_data = corrected_df.to_dict(orient='records')
161
-
162
- # Save corrected data to GCS
163
- save_corrections_to_gcs(bucket_name, file_name, correction_data)
164
-
165
- st.success("Data telah disimpan.")
166
- st.session_state.corrected_df = corrected_df
167
- else:
168
- st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
169
- else:
170
- st.warning("Data deteksi tidak ditemukan.")
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
+ from test import predict_hoax, evaluate_model_performance
5
+ from load_model import load_model
6
+ from styles import COMMON_CSS
7
+ from google.cloud import storage
8
+ from io import StringIO
9
+ import os
10
+ from datetime import datetime
11
+
12
+ # Set environment variable for Google Cloud credentials
13
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
14
+
15
+ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
16
+ client = storage.Client()
17
+ bucket = client.bucket("dashboardhoax-bucket")
18
+ blob = bucket.blob("koreksi_pengguna_file.csv")
19
+
20
+ # Check if the blob (file) exists
21
+ if blob.exists():
22
+ # Download existing CSV from GCS
23
+ existing_data = blob.download_as_string().decode('utf-8')
24
+ existing_df = pd.read_csv(StringIO(existing_data))
25
+ else:
26
+ # Create a new DataFrame if the file does not exist
27
+ existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
28
+
29
+ # Append the new data to the existing data
30
+ new_data_df = pd.DataFrame(correction_data)
31
+ updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
32
+
33
+ # Convert the DataFrame back to CSV and upload
34
+ updated_csv_data = updated_df.to_csv(index=False)
35
+ blob.upload_from_string(updated_csv_data, content_type='text/csv')
36
+
37
+ def load_data(file):
38
+ return pd.read_csv(file)
39
+
40
+ def show_deteksi_upload():
41
+ st.markdown(COMMON_CSS, unsafe_allow_html=True)
42
+
43
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
44
+ selected_model = st.selectbox(
45
+ "",
46
+ [
47
+ "cahya/bert-base-indonesian-522M",
48
+ "indobenchmark/indobert-base-p2",
49
+ "indolem/indobert-base-uncased",
50
+ "mdhugol/indonesia-bert-sentiment-classification"
51
+ ],
52
+ key="model_selector_upload"
53
+ )
54
+
55
+ tokenizer, model = load_model(selected_model)
56
+
57
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
58
+ uploaded_file = st.file_uploader("", type="csv")
59
+
60
+ if 'df' not in st.session_state:
61
+ st.session_state.df = None
62
+
63
+ if uploaded_file is not None:
64
+ df = load_data(uploaded_file)
65
+ df.index = df.index + 1
66
+
67
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
68
+
69
+ grid_options = GridOptionsBuilder.from_dataframe(df)
70
+ grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
71
+ gridOptions = grid_options.build()
72
+
73
+ AgGrid(
74
+ df,
75
+ gridOptions=gridOptions,
76
+ update_mode=GridUpdateMode.VALUE_CHANGED,
77
+ use_container_width=True
78
+ )
79
+
80
+ if st.button("Deteksi", key="detect_upload"):
81
+ try:
82
+ df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
83
+ df['Correction'] = False
84
+ st.session_state.df = df.copy()
85
+ except Exception as e:
86
+ st.error(f"Terjadi kesalahan saat deteksi: {e}")
87
+
88
+ if st.session_state.df is not None:
89
+
90
+ accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
91
+ performance_text = (
92
+ f"*Performansi Model*\n\n"
93
+ f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
94
+ f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
95
+ f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
96
+ f"*F1 Score:* {round(f1, 2)}"
97
+ )
98
+
99
+ st.success(performance_text)
100
+
101
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
102
+
103
+ cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
104
+ df_reordered = st.session_state.df[cols]
105
+
106
+ grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
107
+ grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
108
+ grid_options.configure_default_column(editable=True, groupable=True)
109
+ gridOptions = grid_options.build()
110
+
111
+ grid_response = AgGrid(
112
+ st.session_state.df,
113
+ gridOptions=gridOptions,
114
+ update_mode=GridUpdateMode.VALUE_CHANGED
115
+ )
116
+
117
+ if grid_response['data'] is not None:
118
+ edited_df = pd.DataFrame(grid_response['data'])
119
+ st.session_state.df = edited_df.copy()
120
+ corrected_df = edited_df[edited_df['Correction']].copy()
121
+
122
+ edited_df['Result_Correction'] = edited_df.apply(lambda row:
123
+ 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
124
+ ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
125
+ axis=1
126
+ )
127
+
128
+ st.session_state.df = edited_df.copy()
129
+
130
+ if not corrected_df.empty:
131
+ corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
132
+ 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
133
+ ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
134
+ axis=1
135
+ )
136
+
137
+ # Add Timestamp only for saving
138
+ corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
139
+
140
+ cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
141
+ corrected_df_to_display = corrected_df[cols]
142
+
143
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
144
+ st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
145
+ else:
146
+ st.write("Tidak ada data yang dikoreksi.")
147
+
148
+ if st.button("Simpan", key="corrected_data"):
149
+ if 'df' in st.session_state:
150
+ corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
151
+ corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
152
+ corrected_df = corrected_df.drop(columns=['Correction'])
153
+
154
+ if not corrected_df.empty:
155
+ # Define GCS bucket and file name
156
+ bucket_name = "your-bucket-name"
157
+ file_name = "corrected_upload_data.csv"
158
+
159
+ # Convert DataFrame to list of dicts for GCS
160
+ correction_data = corrected_df.to_dict(orient='records')
161
+
162
+ # Save corrected data to GCS
163
+ save_corrections_to_gcs(bucket_name, file_name, correction_data)
164
+
165
+ st.success("Data telah disimpan.")
166
+ st.session_state.corrected_df = corrected_df
167
+ else:
168
+ st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
169
+ else:
170
+ st.warning("Data deteksi tidak ditemukan.")