File size: 6,865 Bytes
21ac434 7bf8be4 20f957a 93b4f33 20f957a 68a2713 93b4f33 7bf8be4 68a2713 20f957a 68a2713 7bf8be4 68a2713 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a 7bf8be4 20f957a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
# Set page configuration
st.set_page_config(layout="wide")
# Function to load and clean data
def load_and_clean_data():
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
df2 = pd.read_csv("data/reviewed_news_english.csv")
df3 = pd.read_csv("data/tamil_social_media.csv")
df4 = pd.read_csv("data/tamil_news.csv")
# Concatenate dataframes and clean data
df_combined = pd.concat([df1, df2, df3, df4])
df_combined['Domain'] = df_combined['Domain'].replace("MUSLIM", "Muslim")
df_combined = df_combined[df_combined['Domain'] != 'Not relevant']
df_combined = df_combined[df_combined['Domain'] != 'None']
df_combined = df_combined[df_combined['Discrimination'] != 'None']
df_combined = df_combined[df_combined['Sentiment'] != 'None']
return df_combined
df = load_and_clean_data()
# Define Sidebar Filters
domain_options = df['Domain'].unique()
channel_options = df['Channel'].unique()
sentiment_options = df['Sentiment'].unique()
discrimination_options = df['Discrimination'].unique()
domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options)
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options)
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
# Apply filters
df_filtered = df[(df['Domain'].isin(domain_filter)) &
(df['Channel'].isin(channel_filter)) &
(df['Sentiment'].isin(sentiment_filter)) &
(df['Discrimination'].isin(discrimination_filter))]
# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis
# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
# Visualisation for Domain Distribution
def create_pie_chart(df, column, title):
fig = px.pie(df, names=column, title=title, hole=0.35)
fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
fig.update_traces(marker=dict(colors=color_palette))
return fig
# Visualization for Distribution of Gender versus Ethnicity
def create_gender_ethnicity_distribution_chart(df):
df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
return fig
# Visualization for Sentiment Distribution Across Domains
def create_sentiment_distribution_chart(df):
df['Discrimination'] = df['Discrimination'].replace({"Non Discriminative": "Non-Discriminative"}) # Assuming typo in the original script
domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', title="Sentiment Distribution Across Domains", barmode='stack')
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=12))
return fig
# Visualization for Correlation between Sentiment and Discrimination
def create_sentiment_discrimination_grouped_chart(df):
crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination']).reset_index()
melted_df = pd.melt(crosstab_df, id_vars='Sentiment', value_vars=['Yes', 'No'], var_name='Discrimination', value_name='Count')
fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Sentiment", yaxis_title="Count", font=dict(size=12))
return fig
# Function for Channel-wise Sentiment Over Time Chart
def create_channel_sentiment_over_time_chart(df):
df['Date'] = pd.to_datetime(df['Date'])
timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
return fig
# Function for Channel-wise Distribution of Discriminative Content Chart
def create_channel_discrimination_chart(df):
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
return fig
def render_dashboard(page, df_filtered):
if page == "Overview":
st.title("Overview Dashboard")
# Create 2x2 grid for overview visualizations
col1, col2 = st.beta_columns(2)
with col1:
st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
with col2:
st.plotly_chart(create_gender_ethnicity_distribution_chart(df_filtered))
col3, col4 = st.beta_columns(2)
with col3:
st.plotly_chart(create_sentiment_distribution_chart(df_filtered))
with col4:
st.plotly_chart(create_sentiment_discrimination_grouped_chart(df_filtered))
elif page == "Sentiment Analysis":
st.title("Sentiment Analysis Dashboard")
# Implementation for the "Sentiment Analysis" page...
# Example: st.plotly_chart(create_some_other_chart(df_filtered))
elif page == "Discrimination Analysis":
st.title("Discrimination Analysis Dashboard")
# Implementation for the "Discrimination Analysis" page...
# Example: st.plotly_chart(create_another_chart(df_filtered))
elif page == "Channel Analysis":
st.title("Channel Analysis Dashboard")
# Create visualizations for the channel analysis page
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(create_channel_sentiment_over_time_chart(df_filtered))
with col2:
st.plotly_chart(create_channel_discrimination_chart(df_filtered))
# Render the dashboard with filtered data
render_dashboard(page, df_filtered)
|