Spaces:

nlp-brin-id
/

deteksihoax

Running

App Files Files Community

deteksihoax / home.py

Nakhwa

Update home.py

0bb298c verified 6 months ago

raw

history blame contribute delete

16.4 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	from wordcloud import WordCloud, STOPWORDS
	import matplotlib.pyplot as plt

	# Caching data loading
	@st.cache_data
	def load_data():
	df = pd.read_csv("mafindo_mix_llm.csv")
	return df

	# Caching WordCloud generation
	@st.cache_resource
	def generate_wordcloud(text, colormap, stopwords):
	wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text)
	return wordcloud

	def show_home():
	# Load the dataset
	df = load_data()

	# Convert 'Tanggal' to datetime
	df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
	df['Year'] = df['Tanggal'].dt.year

	# Convert text columns to string to avoid type errors
	df['Content'] = df['Content'].astype(str)

	# Define additional stopwords
	additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"}

	# Combine default stopwords with additional stopwords
	combined_stopwords = set(STOPWORDS).union(additional_stopwords)


	# Row with 4 visualizations
	col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5])

	# Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly
	with col1:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax</h6>", unsafe_allow_html=True)
	df_label_counts = df['Label'].value_counts().reset_index()
	df_label_counts.columns = ['Label', 'Jumlah']
	bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label',
	color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
	bar_chart_label.update_layout(
	width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah',
	xaxis_title_font_size=10, yaxis_title_font_size=10,
	xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
	showlegend=False
	)
	st.plotly_chart(bar_chart_label, use_container_width=False)

	# Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly
	with col2:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax per Data Source</h6>", unsafe_allow_html=True)
	datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts')
	fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group',
	color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
	fig_datasource.update_layout(
	width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah',
	xaxis_title_font_size=10, yaxis_title_font_size=10,
	xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0,
	margin=dict(t=10, b=10, l=10, r=50),
	legend=dict(
	font=dict(size=8), # Smaller font size for the legend
	traceorder='normal',
	orientation='v', # Vertical orientation of the legend
	title_text='Label', # Title for the legend
	yanchor='top', y=1, xanchor='left', x=1.05, # Adjust position of the legend
	bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
	bordercolor='rgba(0, 0, 0, 0)' # No border color
	),
	showlegend=True
	)
	st.plotly_chart(fig_datasource, use_container_width=False)

	# Visualization 3: Line chart for Hoax per Year using Plotly
	with col3:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax per Tahun</h6>", unsafe_allow_html=True)

	# Filter data to include only years up to 2023
	hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count')

	line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear',
	color_discrete_sequence=['red'])
	line_chart_hoax.update_layout(
	width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax',
	xaxis_title_font_size=10, yaxis_title_font_size=10,
	xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
	showlegend=False
	)
	st.plotly_chart(line_chart_hoax, use_container_width=False)


	# Visualization 4: Bar chart for Topics per Year using Plotly
	with col4:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Topik per Tahun</h6>", unsafe_allow_html=True)
	df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
	df['Year'] = df['Tanggal'].dt.year

	# Filter the data to include only years up to 2023
	df_mafindo_filtered = df[df['Year'] <= 2023]

	topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count')

	# Create the vertical bar chart
	bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic',
	color_continuous_scale=px.colors.sequential.Viridis)

	# Update layout to adjust the legend
	bar_chart_topics.update_layout(
	width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik',
	xaxis_title_font_size=10, yaxis_title_font_size=10,
	xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
	showlegend=True,
	legend=dict(
	yanchor="top", y=1, xanchor="left", x=1.02, # Adjust position of the legend
	bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
	bordercolor='rgba(0, 0, 0, 0)', # No border color
	itemclick='toggleothers', # Allow toggling of legend items
	itemsizing='constant', # Consistent sizing for legend items
	font=dict(size=8),
	traceorder='normal',
	orientation='v', # Vertical orientation of legend
	title_text='Topic'
	)
	)

	st.plotly_chart(bar_chart_topics, use_container_width=True)


	# Create a new row for WordCloud visualizations
	col5, col6, col7 = st.columns([2, 2.5, 2.5])

	# Wordcloud for Hoax
	with col5:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Wordcloud Hoax</h6>", unsafe_allow_html=True)
	hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content'])
	wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords)
	fig_hoax = plt.figure(figsize=(5, 2.5))
	plt.imshow(wordcloud_hoax, interpolation='bilinear')
	plt.axis('off')
	st.pyplot(fig_hoax)

	with col6:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Klasifikasi</h6>", unsafe_allow_html=True)
	df['Classification'] = df['Classification'].str.lower()
	df_classification_counts = df['Classification'].value_counts().reset_index()
	df_classification_counts.columns = ['Classification', 'Count']

	# Create the donut chart
	donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count',
	hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)

	# Update layout to move the legend and adjust its size
	donut_chart_classification.update_layout(
	width=300, height=170, # Adjust the size of the chart
	margin=dict(t=20, b=20, l=20, r=120), # Adjust margins to make room for the legend
	legend=dict(
	yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
	bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
	bordercolor='rgba(0, 0, 0, 0)', # No border color
	itemclick='toggleothers', # Allow toggling of legend items
	itemsizing='constant', # Consistent sizing for legend items
	font=dict(size=8), # Smaller font size for the legend
	traceorder='normal',
	orientation='v', # Vertical legend
	title_text='Classification' # Title for the legend
	)
	)
	st.plotly_chart(donut_chart_classification, use_container_width=True)

	with col7:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Tone</h6>", unsafe_allow_html=True)
	df_tone_counts = df['Tone'].value_counts().reset_index()
	df_tone_counts.columns = ['Tone', 'Count']

	# Create the donut chart
	donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count',
	hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)

	# Update layout to move the legend and adjust its size
	donut_chart_tone.update_layout(
	width=250, height=170, # Adjust the size of the chart
	margin=dict(t=20, b=20, l=20, r=100), # Adjust margins to make room for the legend
	legend=dict(
	yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
	bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
	bordercolor='rgba(0, 0, 0, 0)', # No border color
	itemclick='toggleothers', # Allow toggling of legend items
	itemsizing='constant', # Consistent sizing for legend items
	font=dict(size=8), # Smaller font size for the legend
	traceorder='normal',
	orientation='v', # Vertical legend
	title_text='Tone' # Title for the legend
	)
	)
	st.plotly_chart(donut_chart_tone, use_container_width=True)

	col8, col9 = st.columns([5, 1.5])

	# Evaluation Metrics Table
	data = [
	["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023],
	["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358],
	["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124],
	["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191]
	]

	highest_accuracy = max(data, key=lambda x: x[-1])

	# Header Table
	html_table = """
	<table style="width:100%; border-collapse: collapse; font-size: 12px; border-top: 1px solid black; border-bottom: 1px solid black;">
	<tr style="border-bottom: 1px solid black; text-align: center; border-top: 1px solid black;">
	<th rowspan="2" style="border: none; padding: 5px; font-size: 14px; text-align: left; border-top: 1px solid black;">Pre-trained Model</th>
	<th colspan="3" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">NON-HOAX</th>
	<th colspan="3" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">HOAX</th>
	<th rowspan="2" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">Accuracy</th>
	</tr>
	<tr style="border-bottom: 1px solid black;">
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
	<th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
	</tr>
	"""

	# Isi Data
	for row in data:
	formatted_row = [f"{item:.4f}" if isinstance(item, float) else item for item in row]
	if row == highest_accuracy:
	html_table += "<tr style='background-color: #FFF1EA; font-size: 12px; text-align: center; border: 1px solid transparent;'>"
	else:
	html_table += "<tr style='font-size: 12px; text-align: center; border: 1px solid transparent;'>"

	# Left-align the first column (Pre-trained Model)
	html_table += f"<td style='border: none; padding: 5px; text-align: left; font-size: 12px;'>{row[0]}</td>"

	# Center-align the rest of the columns
	for item in formatted_row[1:]:
	html_table += f"<td style='border: none; padding: 5px; text-align: center; font-size: 12px;'>{item}</td>"

	html_table += "</tr>"

	# Add a border to the last row
	html_table += "<tr style='border-top: 1px solid black;'></tr>"

	html_table += "</table>"

	# Tampilkan Tabel di Streamlit
	with col8:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Matriks Evaluasi</h6>", unsafe_allow_html=True)
	st.markdown(html_table, unsafe_allow_html=True)


	html_table_col9 = """
	<table style="width:100%; border-collapse: collapse; font-size: 12px;">
	<thead>
	<tr style="border-top: 1.5px solid #B2BABB; border-bottom: 1.5px solid #B2BABB;">
	<th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #d5f4e6; text-align: left;">Label</th>
	<th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #d5f4e6; text-align: center;">Train</th>
	<th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #d5f4e6; text-align: center;">Test</th>
	<th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #d5f4e6; text-align: center;">Dev</th>
	</tr>
	</thead>
	<tbody>
	<tr style="border-bottom: 1px solid transparent;">
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6;">HOAX</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">11.563</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">193</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">193</td>
	</tr>
	<tr style="border-bottom: 1px solid black;">
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6;">NON-HOAX</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">789</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">106</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">106</td>
	</tr>
	<tr style="font-weight: bold; border-top: 1px solid transparent; border-bottom: 1.5px solid #B2BABB;">
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6;">TOTAL</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">12,352</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">299</td>
	<td style="padding: 8px; border: 1px solid transparent; background-color: #d5f4e6; text-align: center;">299</td>
	</tr>
	</tbody>
	</table>
	"""

	# Display the table in col9 using HTML
	with col9:
	st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Statistik Data</h6>", unsafe_allow_html=True)
	st.markdown(html_table_col9, unsafe_allow_html=True)