File size: 5,359 Bytes
21ac434 7bf8be4 93b4f33 68a2713 93b4f33 68a2713 93b4f33 68a2713 93b4f33 7bf8be4 68a2713 7bf8be4 68a2713 7bf8be4 68a2713 7bf8be4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
# Set page configuration
st.set_page_config(layout="wide")
def load_and_clean_data():
# Load data
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
df2 = pd.read_csv("data/reviewed_news_english.csv")
df3 = pd.read_csv("data/tamil_social_media.csv")
df4 = pd.read_csv("data/tamil_news.csv")
# Concatenate dataframes
df_combined = pd.concat([df1, df2, df3, df4])
# Normalize Text
df_combined['Domain'] = df_combined['Domain'].replace("MUSLIM", "Muslim")
# Drop irrelevant data
df_combined = df_combined[df_combined['Domain'] != 'Not relevant']
df_combined = df_combined[df_combined['Domain'] != 'None']
df_combined = df_combined[df_combined['Discrimination'] != 'None']
df_combined = df_combined[df_combined['Sentiment'] != 'None']
return df_combined
# Load and clean data
df = load_and_clean_data()
# Define Sidebar Filters
domain_options = df['Domain'].unique()
channel_options = df['Channel'].unique()
sentiment_options = df['Sentiment'].unique()
discrimination_options = df['Discrimination'].unique()
domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options)
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options)
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
# Apply the filters to the dataframe
df_filtered = df[(df['Domain'].isin(domain_filter)) &
(df['Channel'].isin(channel_filter)) &
(df['Sentiment'].isin(sentiment_filter)) &
(df['Discrimination'].isin(discrimination_filter))]
# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis
# Visualization function
def create_visualizations(df):
# [Existing visualization code]
pass
# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
if page == "Overview":
create_visualizations(df) # Placeholder for overview visualizations
elif page == "Sentiment Analysis":
create_visualizations(df) # Placeholder for sentiment analysis visualizations
elif page == "Discrimination Analysis":
create_visualizations(df) # Placeholder for discrimination analysis visualizations
elif page == "Channel Analysis":
create_visualizations(df) # Placeholder for channel analysis visualizations
# [Place the rest of the code for the visualizations here]
# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis
# Function for Domain Distribution Chart
def create_domain_distribution_chart(df):
fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
fig.update_traces(marker=dict(colors=color_palette))
return fig
# Function for Sentiment Distribution Across Domains Chart
def create_sentiment_distribution_chart(df):
# ... [Include the existing code for the Sentiment Distribution chart]
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
return fig
# ... [Define other chart functions following the same pattern]
# Function for Channel-wise Sentiment Over Time Chart
def create_channel_sentiment_over_time_chart(df):
df['Date'] = pd.to_datetime(df['Date'])
timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
return fig
# Function for Channel-wise Distribution of Discriminative Content Chart
def create_channel_discrimination_chart(df):
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
return fig
# Dashboard Layout
def render_dashboard():
# Overview page layout
if page == "Overview":
st.header("Overview of Domains and Sentiments")
col1, col2 = st.beta_columns(2)
with col1:
st.plotly_chart(create_domain_distribution_chart(df))
with col2:
st.plotly_chart(create_sentiment_distribution_chart(df))
# ... [Additional overview charts]
# ... [Other pages]
# Render the dashboard with filtered data
render_dashboard(df_filtered)
|