import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.feature_extraction.text import CountVectorizer import seaborn as sns import plotly.express as px import plotly.io as pio import plotly.graph_objects as go # Set page configuration st.set_page_config(layout="wide") # Function to load and clean data def load_and_clean_data(): df1 = pd.read_csv("data/reviewed_social_media_english.csv") df2 = pd.read_csv("data/reviewed_news_english.csv") df3 = pd.read_csv("data/tamil_social_media.csv") df4 = pd.read_csv("data/tamil_news.csv") # Concatenate dataframes and clean data df_combined = pd.concat([df1, df2, df3, df4]) df_combined['Domain'] = df_combined['Domain'].replace({"MUSLIM": "Muslim", "nan": pd.NA, "None": pd.NA, "Other-Ethnic": "Other-Ethnicity"}) df_combined['Sentiment'] = df_combined['Sentiment'].replace({"nan": pd.NA, "None": pd.NA, "No": pd.NA}) # Drop rows with NA values in 'Domain' and 'Sentiment' df_combined = df_combined.dropna(subset=['Domain', 'Sentiment']) return df_combined df = load_and_clean_data() # Sidebar Filters domain_options = df['Domain'].dropna().unique() channel_options = df['Channel'].dropna().unique() sentiment_options = df['Sentiment'].dropna().unique() discrimination_options = df['Discrimination'].dropna().unique() domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options) channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options) sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options) discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options) # Apply filters df_filtered = df[(df['Domain'].isin(domain_filter)) & (df['Channel'].isin(channel_filter)) & (df['Sentiment'].isin(sentiment_filter)) & (df['Discrimination'].isin(discrimination_filter))] # Define a color palette for consistent visualization styles color_palette = px.colors.sequential.Viridis # Page navigation page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]) # Visualisation for Domain Distribution def create_pie_chart(df, column, title): fig = px.pie(df, names=column, title=title, hole=0.35) fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12)) fig.update_traces(marker=dict(colors=color_palette)) return fig # Visualization for Distribution of Gender versus Ethnicity def create_gender_ethnicity_distribution_chart(df): df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity") fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35) fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12)) return fig # Visualization for Sentiment Distribution Across Domains def create_sentiment_distribution_chart(df): df['Discrimination'] = df['Discrimination'].replace({"Non Discriminative": "Non-Discriminative"}) # Assuming typo in the original script domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts') fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', title="Sentiment Distribution Across Domains", barmode='stack') fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=12)) return fig # Visualization for Correlation between Sentiment and Discrimination def create_sentiment_discrimination_grouped_chart(df): crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination']).reset_index() melted_df = pd.melt(crosstab_df, id_vars='Sentiment', value_vars=['Yes', 'No'], var_name='Discrimination', value_name='Count') fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination") fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Sentiment", yaxis_title="Count", font=dict(size=12)) return fig # Function for Channel-wise Sentiment Over Time Chart def create_channel_sentiment_over_time_chart(df): df['Date'] = pd.to_datetime(df['Date']) timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0) fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel') fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20)) return fig # Function for Channel-wise Distribution of Discriminative Content Chart def create_channel_discrimination_chart(df): channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0) fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group') fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20)) return fig # Function for rendering dashboard def render_dashboard(page, df_filtered): if page == "Overview": st.title("Overview Dashboard") # Create 2x2 grid for overview visualizations col1, col2 = st.beta_columns(2) with col1: st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains')) with col2: st.plotly_chart(create_gender_ethnicity_distribution_chart(df_filtered)) col3, col4 = st.beta_columns(2) with col3: st.plotly_chart(create_sentiment_distribution_chart(df_filtered)) with col4: st.plotly_chart(create_sentiment_discrimination_grouped_chart(df_filtered)) elif page == "Sentiment Analysis": st.title("Sentiment Analysis Dashboard") # Implement sentiment analysis visualizations here elif page == "Discrimination Analysis": st.title("Discrimination Analysis Dashboard") # Implement discrimination analysis visualizations here elif page == "Channel Analysis": st.title("Channel Analysis Dashboard") # Create visualizations for the channel analysis page col1, col2 = st.columns(2) with col1: st.plotly_chart(create_channel_sentiment_over_time_chart(df_filtered)) with col2: st.plotly_chart(create_channel_discrimination_chart(df_filtered)) # Render the selected dashboard page render_dashboard(page, df_filtered)