menikev commited on
Commit
863daa6
1 Parent(s): f707acd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -77
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
@@ -37,7 +38,7 @@ df = load_and_clean_data()
37
 
38
 
39
  # Page navigation setup
40
- page_names = ["Analytics Dashboard for Domain Predictions", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
41
  page = st.sidebar.selectbox("Choose a page", page_names)
42
 
43
  # Sidebar Filters
@@ -51,7 +52,6 @@ channel_filter = st.sidebar.multiselect('Select Channel', options=channel_option
51
  sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
52
  discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
53
 
54
-
55
  # Apply filters
56
  df_filtered = df[(df['Domain'].isin(domain_filter)) &
57
  (df['Channel'].isin(channel_filter)) &
@@ -63,67 +63,52 @@ color_palette = px.colors.sequential.Viridis
63
 
64
  # Function to render the model prediction visualization page
65
  def render_prediction_page():
66
- st.title("Streamlit Analytics Dashboard for Model Predictions")
67
- st.write("""
68
- Welcome to the interactive analytics dashboard that brings to life the nuanced assessment of textual content.
69
- Dive into the insightful world of language processing where each sentence you enter is meticulously evaluated
70
- for its domain relevance and sentiment connotation.
71
- Instant Analysis: Enter any text snippet and get immediate predictions with our sophisticated model that assesses content with nuanced precision.
72
- Domain Identification: Discover the domain categorization of your text, providing clarity on the subject matter with a quantifiable domain score.
73
- """)
74
-
75
- # User input text area
76
- user_input = st.text_area("Enter Text/Content here to analyze", height=150)
77
 
78
- if st.button("Perform Contextual Analysis"):
79
- # Use run_pipeline to get predictions
80
- prediction = run_pipeline(user_input)
81
-
82
- # Extract prediction details
83
- domain_label = prediction.get("domain_label", "Unknown")
84
- domain_score = prediction.get("domain_score", 0)
85
- discrimination_label = prediction.get("discrimination_label", "Unknown")
86
- discrimination_score = prediction.get("discrimination_score", 0)
87
-
88
- # Visualization layout
89
- col1, col2 = st.columns(2)
90
-
91
- with col1:
92
- st.markdown("#### Domain Label")
93
- st.markdown(f"## {domain_label}")
94
- st.progress(domain_score)
95
-
96
- with col2:
97
- st.markdown("#### Discrimination Label")
98
- st.markdown(f"## {discrimination_label}")
99
- st.progress(discrimination_score)
100
-
101
- col3, col4 = st.columns(2)
102
-
103
- with col3:
104
- # Domain Score Gauge
105
- fig_domain = go.Figure(go.Indicator(
106
- mode="gauge+number",
107
- value=domain_score,
108
- domain={'x': [0, 1], 'y': [0, 1]},
109
- title={'text': "Domain Score"},
110
- gauge={'axis': {'range': [None, 1]}}))
111
- st.plotly_chart(fig_domain, use_container_width=True)
112
-
113
- with col4:
114
- # Discrimination Score Gauge
115
- fig_discrimination = go.Figure(go.Indicator(
116
- mode="gauge+number",
117
- value=discrimination_score,
118
- domain={'x': [0, 1], 'y': [0, 1]},
119
- title={'text': "Discrimination Score"},
120
- gauge={'axis': {'range': [None, 1]}}))
121
- st.plotly_chart(fig_discrimination, use_container_width=True)
122
-
123
  # Visualisation for Domain Distribution
124
  def create_pie_chart(df, column, title):
125
  fig = px.pie(df, names=column, title=title, hole=0.35)
126
- fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), legend=dict(x=0.1, y=1), font=dict(size=10))
127
  fig.update_traces(marker=dict(colors=color_palette))
128
  return fig
129
 
@@ -131,33 +116,34 @@ def create_pie_chart(df, column, title):
131
  def create_gender_ethnicity_distribution_chart(df):
132
  df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
133
  fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
134
- fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), legend=dict(x=0.1, y=1), font=dict(size=10))
135
  return fig
136
 
137
  # Visualization for Sentiment Distribution Across Domains
138
  def create_sentiment_distribution_chart(df):
139
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
140
  domain_counts = domain_counts.sort_values('counts')
141
-
142
- # Reverse the color scheme
143
  color_map = {'Negative': 'red', 'Positive': 'blue', 'Neutral': 'lightblue'}
144
-
145
  fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', color_discrete_map=color_map,
146
  title="Sentiment Distribution Across Domains", barmode='stack')
147
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
148
  return fig
149
 
 
150
  # Visualization for Correlation between Sentiment and Discrimination
151
  def create_sentiment_discrimination_grouped_chart(df):
152
  # Creating a crosstab of 'Sentiment' and 'Discrimination'
153
  crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
154
-
155
  # Check if 'Yes' and 'No' are in the columns after the crosstab operation
156
  value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
157
-
158
  # If 'No' is not in columns, it will not be included in melting
159
  melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
160
-
161
  # Proceeding to plot only if we have data to plot
162
  if not melted_df.empty:
163
  fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
@@ -166,6 +152,8 @@ def create_sentiment_discrimination_grouped_chart(df):
166
  else:
167
  return "No data to display for the selected filters."
168
 
 
 
169
  # Function for Top Domains with Negative Sentiment Chart
170
  def create_top_negative_sentiment_domains_chart(df):
171
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
@@ -194,26 +182,27 @@ def create_key_phrases_negative_sentiment_chart(df):
194
  def create_key_phrases_positive_sentiment_chart(df):
195
  # Filter the DataFrame for positive sentiments and drop any rows with NaN in 'Content'
196
  positive_df = df[df['Sentiment'] == 'Positive'].dropna(subset=['Content'])
197
-
198
  # Create a CountVectorizer instance
199
  cv = CountVectorizer(ngram_range=(3, 3), stop_words='english')
200
-
201
  # Apply CountVectorizer only on non-null content
202
  trigrams = cv.fit_transform(positive_df['Content'])
203
-
204
  # Sum the frequency of each n-gram and create a DataFrame
205
  count_values = trigrams.toarray().sum(axis=0)
206
  ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
207
  ngram_freq.columns = ['frequency', 'ngram']
208
-
209
  # Create the bar chart
210
  fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
211
-
212
- # Update layout settings to fit and look better
213
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
214
-
215
  return fig
216
-
 
217
  # Function for Prevalence of Discriminatory Content Chart
218
  def create_prevalence_discriminatory_content_chart(df):
219
  domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
@@ -237,7 +226,7 @@ def create_top_discriminatory_domains_chart(df):
237
  def create_sentiment_distribution_by_channel_chart(df):
238
  sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
239
  color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
240
- fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discrete_map=color_map)
241
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
242
  return fig
243
 
@@ -248,9 +237,10 @@ def create_channel_discrimination_chart(df):
248
  fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
249
  return fig
250
 
 
251
  # Function for rendering dashboard
252
  def render_dashboard(page, df_filtered):
253
- if page == "Analytics Dashboard for Domain Predictions":
254
  render_prediction_page()
255
  elif page == "GESI Overview":
256
  st.title(" GESI Overview Dashboard")
@@ -302,4 +292,4 @@ def render_dashboard(page, df_filtered):
302
 
303
 
304
  # Render the selected dashboard page
305
- render_dashboard(page, df_filtered)
 
1
+ import torch
2
  import streamlit as st
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
 
38
 
39
 
40
  # Page navigation setup
41
+ page_names = ["Dashboard for GESI Conversation in Sri Lanka", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
42
  page = st.sidebar.selectbox("Choose a page", page_names)
43
 
44
  # Sidebar Filters
 
52
  sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
53
  discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
54
 
 
55
  # Apply filters
56
  df_filtered = df[(df['Domain'].isin(domain_filter)) &
57
  (df['Channel'].isin(channel_filter)) &
 
63
 
64
  # Function to render the model prediction visualization page
65
  def render_prediction_page():
66
+ st.title("Dashboard for GESI Conversations in Sri Lanka")
67
+ st.write("""
68
+ Instant Analysis: Enter any text snippet and get immediate predictions from out model train on English, Sinhala and Tamil based languages \n\n
69
+ Domain Identification: Discover the subject matter of your text with a quantifiable domain score. """)
70
+
71
+ # User input text area
72
+ user_input = st.text_are("Enter Text/Content here to analyze", height=150)
73
+
74
+ if st.button("Perfrom contextual Analysis"):
75
+ # Use run_pipeline to get predictions
76
+ predictions = run_pipeline(user_input)
77
 
78
+ # Extract prediction details
79
+ domain_label = prediction.get("domain_label", "Unknown")
80
+ domain_score = prediction.get("domain_socre", 0)
81
+ discrimination_label = prediction.get("discrimination_label", "Unknown")
82
+ discrimination_score = prediction.get("discrimination_score", 0)
83
+
84
+ # Visualization layout
85
+ col1, col2 = st.columns(2)
86
+
87
+ with col1:
88
+ st.markdown("#### Domain Label")
89
+ st.markdown(f"## {domain_label}")
90
+ st.progress(domain_score)
91
+
92
+ with col2:
93
+ st.makrdown("#### Discrimination Label")
94
+ st.markdown(f"## {discrimination_label}")
95
+ st.progress(domain_score)
96
+
97
+ col3, col4 = st.columns(2)
98
+
99
+ with col3:
100
+ # Display Domain Score in Bold
101
+ st.markdown(f'**Domain Score: {domain_score:.2f}**', unsafe_allow_html=True)
102
+
103
+ with col4:
104
+ # Display Discrimination Score in Bold
105
+ st.markdown(f'**Discrimination Score: {discrimination_score:.2f}**', unsafe_allow_html=True)
106
+
107
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # Visualisation for Domain Distribution
109
  def create_pie_chart(df, column, title):
110
  fig = px.pie(df, names=column, title=title, hole=0.35)
111
+ fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
112
  fig.update_traces(marker=dict(colors=color_palette))
113
  return fig
114
 
 
116
  def create_gender_ethnicity_distribution_chart(df):
117
  df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
118
  fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
119
+ fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
120
  return fig
121
 
122
  # Visualization for Sentiment Distribution Across Domains
123
  def create_sentiment_distribution_chart(df):
124
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
125
  domain_counts = domain_counts.sort_values('counts')
126
+
127
+ # color scheme
128
  color_map = {'Negative': 'red', 'Positive': 'blue', 'Neutral': 'lightblue'}
129
+
130
  fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', color_discrete_map=color_map,
131
  title="Sentiment Distribution Across Domains", barmode='stack')
132
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
133
  return fig
134
 
135
+
136
  # Visualization for Correlation between Sentiment and Discrimination
137
  def create_sentiment_discrimination_grouped_chart(df):
138
  # Creating a crosstab of 'Sentiment' and 'Discrimination'
139
  crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
140
+
141
  # Check if 'Yes' and 'No' are in the columns after the crosstab operation
142
  value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
143
+
144
  # If 'No' is not in columns, it will not be included in melting
145
  melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
146
+
147
  # Proceeding to plot only if we have data to plot
148
  if not melted_df.empty:
149
  fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
 
152
  else:
153
  return "No data to display for the selected filters."
154
 
155
+
156
+
157
  # Function for Top Domains with Negative Sentiment Chart
158
  def create_top_negative_sentiment_domains_chart(df):
159
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
 
182
  def create_key_phrases_positive_sentiment_chart(df):
183
  # Filter the DataFrame for positive sentiments and drop any rows with NaN in 'Content'
184
  positive_df = df[df['Sentiment'] == 'Positive'].dropna(subset=['Content'])
185
+
186
  # Create a CountVectorizer instance
187
  cv = CountVectorizer(ngram_range=(3, 3), stop_words='english')
188
+
189
  # Apply CountVectorizer only on non-null content
190
  trigrams = cv.fit_transform(positive_df['Content'])
191
+
192
  # Sum the frequency of each n-gram and create a DataFrame
193
  count_values = trigrams.toarray().sum(axis=0)
194
  ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
195
  ngram_freq.columns = ['frequency', 'ngram']
196
+
197
  # Create the bar chart
198
  fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
199
+
200
+ # Update layout settings
201
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
202
+
203
  return fig
204
+
205
+
206
  # Function for Prevalence of Discriminatory Content Chart
207
  def create_prevalence_discriminatory_content_chart(df):
208
  domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
 
226
  def create_sentiment_distribution_by_channel_chart(df):
227
  sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
228
  color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
229
+ fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discret>
230
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
231
  return fig
232
 
 
237
  fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
238
  return fig
239
 
240
+
241
  # Function for rendering dashboard
242
  def render_dashboard(page, df_filtered):
243
+ if page == "Dashboard for GESI Conversations in Sri Lanka":
244
  render_prediction_page()
245
  elif page == "GESI Overview":
246
  st.title(" GESI Overview Dashboard")
 
292
 
293
 
294
  # Render the selected dashboard page
295
+ render_dashboard(page, df_filtered)