File size: 5,359 Bytes
21ac434
7bf8be4
 
 
 
 
 
 
 
 
 
 
93b4f33
68a2713
93b4f33
 
 
 
 
68a2713
 
 
 
 
 
 
 
 
 
 
 
93b4f33
 
68a2713
93b4f33
7bf8be4
68a2713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bf8be4
68a2713
 
7bf8be4
68a2713
 
7bf8be4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

# Set page configuration
st.set_page_config(layout="wide")

def load_and_clean_data():
    # Load data
    df1 = pd.read_csv("data/reviewed_social_media_english.csv")
    df2 = pd.read_csv("data/reviewed_news_english.csv")
    df3 = pd.read_csv("data/tamil_social_media.csv")  
    df4 = pd.read_csv("data/tamil_news.csv")       

    # Concatenate dataframes
    df_combined = pd.concat([df1, df2, df3, df4])

    # Normalize Text
    df_combined['Domain'] = df_combined['Domain'].replace("MUSLIM", "Muslim")

    # Drop irrelevant data
    df_combined = df_combined[df_combined['Domain'] != 'Not relevant']
    df_combined = df_combined[df_combined['Domain'] != 'None']
    df_combined = df_combined[df_combined['Discrimination'] != 'None']
    df_combined = df_combined[df_combined['Sentiment'] != 'None']

    return df_combined

# Load and clean data
df = load_and_clean_data()

# Define Sidebar Filters
domain_options = df['Domain'].unique()
channel_options = df['Channel'].unique()
sentiment_options = df['Sentiment'].unique()
discrimination_options = df['Discrimination'].unique()

domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options)
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options)
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)

# Apply the filters to the dataframe
df_filtered = df[(df['Domain'].isin(domain_filter)) & 
                 (df['Channel'].isin(channel_filter)) & 
                 (df['Sentiment'].isin(sentiment_filter)) & 
                 (df['Discrimination'].isin(discrimination_filter))]

# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])

# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis

# Visualization function
def create_visualizations(df):
    # [Existing visualization code]
    pass

# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])

if page == "Overview":
    create_visualizations(df)  # Placeholder for overview visualizations
elif page == "Sentiment Analysis":
    create_visualizations(df)  # Placeholder for sentiment analysis visualizations
elif page == "Discrimination Analysis":
    create_visualizations(df)  # Placeholder for discrimination analysis visualizations
elif page == "Channel Analysis":
    create_visualizations(df)  # Placeholder for channel analysis visualizations

# [Place the rest of the code for the visualizations here]


# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis

# Function for Domain Distribution Chart
def create_domain_distribution_chart(df):
    fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
    fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
    fig.update_traces(marker=dict(colors=color_palette))
    return fig

# Function for Sentiment Distribution Across Domains Chart
def create_sentiment_distribution_chart(df):
    # ... [Include the existing code for the Sentiment Distribution chart]
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
    return fig

# ... [Define other chart functions following the same pattern]

# Function for Channel-wise Sentiment Over Time Chart
def create_channel_sentiment_over_time_chart(df):
    df['Date'] = pd.to_datetime(df['Date'])
    timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
    fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
    fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
    return fig

# Function for Channel-wise Distribution of Discriminative Content Chart
def create_channel_discrimination_chart(df):
    channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
    fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
    fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
    return fig

# Dashboard Layout
def render_dashboard():
    # Overview page layout
    if page == "Overview":
        st.header("Overview of Domains and Sentiments")
        col1, col2 = st.beta_columns(2)
        with col1:
            st.plotly_chart(create_domain_distribution_chart(df))
        with col2:
            st.plotly_chart(create_sentiment_distribution_chart(df))
        # ... [Additional overview charts]

    # ... [Other pages]


# Render the dashboard with filtered data
render_dashboard(df_filtered)