Spaces:

menikev
/

TestApp

Sleeping

App Files Files Community

menikev commited on Jun 7, 2024

Commit

863daa6

verified ·

1 Parent(s): f707acd

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -77

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -37,7 +38,7 @@ df = load_and_clean_data()
 # Page navigation setup
-page_names = ["Analytics Dashboard for Domain Predictions", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
 page = st.sidebar.selectbox("Choose a page", page_names)
 # Sidebar Filters
@@ -51,7 +52,6 @@ channel_filter = st.sidebar.multiselect('Select Channel', options=channel_option
 sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
 discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
 # Apply filters
 df_filtered = df[(df['Domain'].isin(domain_filter)) &
                  (df['Channel'].isin(channel_filter)) &
@@ -63,67 +63,52 @@ color_palette = px.colors.sequential.Viridis
 # Function to render the model prediction visualization page
 def render_prediction_page():
-    st.title("Streamlit Analytics Dashboard for Model Predictions")
-    st.write("""
-    Welcome to the interactive analytics dashboard that brings to life the nuanced assessment of textual content.
-    Dive into the insightful world of language processing where each sentence you enter is meticulously evaluated
-    for its domain relevance and sentiment connotation.
-    Instant Analysis: Enter any text snippet and get immediate predictions with our sophisticated model that assesses content with nuanced precision.
-    Domain Identification: Discover the domain categorization of your text, providing clarity on the subject matter with a quantifiable domain score.
-    """)
-    # User input text area
-    user_input = st.text_area("Enter Text/Content here to analyze", height=150)
-    if st.button("Perform Contextual Analysis"):
-        # Use run_pipeline to get predictions
-        prediction = run_pipeline(user_input)
-        # Extract prediction details
-        domain_label = prediction.get("domain_label", "Unknown")
-        domain_score = prediction.get("domain_score", 0)
-        discrimination_label = prediction.get("discrimination_label", "Unknown")
-        discrimination_score = prediction.get("discrimination_score", 0)
-        # Visualization layout
-        col1, col2 = st.columns(2)
-        with col1:
-            st.markdown("#### Domain Label")
-            st.markdown(f"## {domain_label}")
-            st.progress(domain_score)
-        with col2:
-            st.markdown("#### Discrimination Label")
-            st.markdown(f"## {discrimination_label}")
-            st.progress(discrimination_score)
-        col3, col4 = st.columns(2)
-        with col3:
-            # Domain Score Gauge
-            fig_domain = go.Figure(go.Indicator(
-                mode="gauge+number",
-                value=domain_score,
-                domain={'x': [0, 1], 'y': [0, 1]},
-                title={'text': "Domain Score"},
-                gauge={'axis': {'range': [None, 1]}}))
-            st.plotly_chart(fig_domain, use_container_width=True)
-        with col4:
-            # Discrimination Score Gauge
-            fig_discrimination = go.Figure(go.Indicator(
-                mode="gauge+number",
-                value=discrimination_score,
-                domain={'x': [0, 1], 'y': [0, 1]},
-                title={'text': "Discrimination Score"},
-                gauge={'axis': {'range': [None, 1]}}))
-            st.plotly_chart(fig_discrimination, use_container_width=True)
 # Visualisation for Domain Distribution
 def create_pie_chart(df, column, title):
     fig = px.pie(df, names=column, title=title, hole=0.35)
-    fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), legend=dict(x=0.1, y=1), font=dict(size=10))
     fig.update_traces(marker=dict(colors=color_palette))
     return fig
@@ -131,33 +116,34 @@ def create_pie_chart(df, column, title):
 def create_gender_ethnicity_distribution_chart(df):
     df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
     fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
-    fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), legend=dict(x=0.1, y=1), font=dict(size=10))
     return fig
 # Visualization for Sentiment Distribution Across Domains
 def create_sentiment_distribution_chart(df):
     domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
     domain_counts = domain_counts.sort_values('counts')
-    # Reverse the color scheme
     color_map = {'Negative': 'red', 'Positive': 'blue', 'Neutral': 'lightblue'}
     fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', color_discrete_map=color_map,
                  title="Sentiment Distribution Across Domains", barmode='stack')
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
     return fig
 # Visualization for Correlation between Sentiment and Discrimination
 def create_sentiment_discrimination_grouped_chart(df):
     # Creating a crosstab of 'Sentiment' and 'Discrimination'
     crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
     # Check if 'Yes' and 'No' are in the columns after the crosstab operation
     value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
     # If 'No' is not in columns, it will not be included in melting
     melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
     # Proceeding to plot only if we have data to plot
     if not melted_df.empty:
         fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
@@ -166,6 +152,8 @@ def create_sentiment_discrimination_grouped_chart(df):
     else:
         return "No data to display for the selected filters."
 # Function for Top Domains with Negative Sentiment Chart
 def create_top_negative_sentiment_domains_chart(df):
     domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
@@ -194,26 +182,27 @@ def create_key_phrases_negative_sentiment_chart(df):
 def create_key_phrases_positive_sentiment_chart(df):
     # Filter the DataFrame for positive sentiments and drop any rows with NaN in 'Content'
     positive_df = df[df['Sentiment'] == 'Positive'].dropna(subset=['Content'])
     # Create a CountVectorizer instance
     cv = CountVectorizer(ngram_range=(3, 3), stop_words='english')
     # Apply CountVectorizer only on non-null content
     trigrams = cv.fit_transform(positive_df['Content'])
     # Sum the frequency of each n-gram and create a DataFrame
     count_values = trigrams.toarray().sum(axis=0)
     ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
     ngram_freq.columns = ['frequency', 'ngram']
     # Create the bar chart
     fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
-    # Update layout settings to fit and look better
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
     return fig
 # Function for Prevalence of Discriminatory Content Chart
 def create_prevalence_discriminatory_content_chart(df):
     domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
@@ -237,7 +226,7 @@ def create_top_discriminatory_domains_chart(df):
 def create_sentiment_distribution_by_channel_chart(df):
     sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
     color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
-    fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discrete_map=color_map)
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
     return fig
@@ -248,9 +237,10 @@ def create_channel_discrimination_chart(df):
     fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
     return fig
 # Function for rendering dashboard
 def render_dashboard(page, df_filtered):
-    if page == "Analytics Dashboard for Domain Predictions":
         render_prediction_page()
     elif page == "GESI Overview":
         st.title(" GESI Overview Dashboard")
@@ -302,4 +292,4 @@ def render_dashboard(page, df_filtered):
 # Render the selected dashboard page
-render_dashboard(page, df_filtered)

+import torch
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 # Page navigation setup
+page_names = ["Dashboard for GESI Conversation in Sri Lanka", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
 page = st.sidebar.selectbox("Choose a page", page_names)
 # Sidebar Filters
 sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
 discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
 # Apply filters
 df_filtered = df[(df['Domain'].isin(domain_filter)) &
                  (df['Channel'].isin(channel_filter)) &
 # Function to render the model prediction visualization page
 def render_prediction_page():
+  st.title("Dashboard for GESI Conversations in Sri Lanka")
+  st.write("""
+  Instant Analysis: Enter any text snippet and get immediate predictions from out model train on English, Sinhala and Tamil based languages \n\n
+  Domain Identification: Discover the subject matter of your text with a quantifiable domain score. """)
+  # User input text area
+  user_input = st.text_are("Enter Text/Content here to analyze", height=150)
+  if st.button("Perfrom contextual Analysis"):
+    # Use run_pipeline to get predictions
+    predictions = run_pipeline(user_input)
+    # Extract prediction details
+    domain_label = prediction.get("domain_label", "Unknown")
+    domain_score = prediction.get("domain_socre", 0)
+    discrimination_label = prediction.get("discrimination_label", "Unknown")
+    discrimination_score = prediction.get("discrimination_score", 0)
+    # Visualization layout
+    col1, col2 = st.columns(2)
+    with col1:
+      st.markdown("#### Domain Label")
+      st.markdown(f"## {domain_label}")
+      st.progress(domain_score)
+    with col2:
+      st.makrdown("#### Discrimination Label")
+      st.markdown(f"## {discrimination_label}")
+      st.progress(domain_score)
+    col3, col4 = st.columns(2)
+    with col3:
+      # Display Domain Score in Bold
+      st.markdown(f'**Domain Score: {domain_score:.2f}**', unsafe_allow_html=True)
+    with col4:
+      # Display Discrimination Score in Bold
+      st.markdown(f'**Discrimination Score: {discrimination_score:.2f}**', unsafe_allow_html=True)
 # Visualisation for Domain Distribution
 def create_pie_chart(df, column, title):
     fig = px.pie(df, names=column, title=title, hole=0.35)
+    fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
     fig.update_traces(marker=dict(colors=color_palette))
     return fig
 def create_gender_ethnicity_distribution_chart(df):
     df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
     fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
+    fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
     return fig
 # Visualization for Sentiment Distribution Across Domains
 def create_sentiment_distribution_chart(df):
     domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
     domain_counts = domain_counts.sort_values('counts')
+    # color scheme
     color_map = {'Negative': 'red', 'Positive': 'blue', 'Neutral': 'lightblue'}
     fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', color_discrete_map=color_map,
                  title="Sentiment Distribution Across Domains", barmode='stack')
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
     return fig
 # Visualization for Correlation between Sentiment and Discrimination
 def create_sentiment_discrimination_grouped_chart(df):
     # Creating a crosstab of 'Sentiment' and 'Discrimination'
     crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
     # Check if 'Yes' and 'No' are in the columns after the crosstab operation
     value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
     # If 'No' is not in columns, it will not be included in melting
     melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
     # Proceeding to plot only if we have data to plot
     if not melted_df.empty:
         fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
     else:
         return "No data to display for the selected filters."
 # Function for Top Domains with Negative Sentiment Chart
 def create_top_negative_sentiment_domains_chart(df):
     domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
 def create_key_phrases_positive_sentiment_chart(df):
     # Filter the DataFrame for positive sentiments and drop any rows with NaN in 'Content'
     positive_df = df[df['Sentiment'] == 'Positive'].dropna(subset=['Content'])
     # Create a CountVectorizer instance
     cv = CountVectorizer(ngram_range=(3, 3), stop_words='english')
     # Apply CountVectorizer only on non-null content
     trigrams = cv.fit_transform(positive_df['Content'])
     # Sum the frequency of each n-gram and create a DataFrame
     count_values = trigrams.toarray().sum(axis=0)
     ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
     ngram_freq.columns = ['frequency', 'ngram']
     # Create the bar chart
     fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
+    # Update layout settings
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
     return fig
 # Function for Prevalence of Discriminatory Content Chart
 def create_prevalence_discriminatory_content_chart(df):
     domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
 def create_sentiment_distribution_by_channel_chart(df):
     sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
     color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
+    fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discret>
     fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
     return fig
     fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
     return fig
 # Function for rendering dashboard
 def render_dashboard(page, df_filtered):
+    if page == "Dashboard for GESI Conversations in Sri Lanka":
         render_prediction_page()
     elif page == "GESI Overview":
         st.title(" GESI Overview Dashboard")
 # Render the selected dashboard page
+render_dashboard(page, df_filtered)