Spaces:

tuanpasg
/

absa_hcmus

Sleeping

File size: 2,533 Bytes

a9200d2
 
882e0e7
a9200d2
 
 
882e0e7
1b95adc
a9200d2
1b95adc
 
a9200d2
 
 
 
 
 
22d2130
a9200d2
 
 
1b95adc
 
a9200d2
 
 
 
1b95adc
a9200d2
 
 
1b95adc
a9200d2
1b95adc
 
 
c5ccd6f
a9200d2
 
1b95adc
 
 
 
 
 
 
 
 
a9200d2

import pandas as pd
import json
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

st.set_option('deprecation.showPyplotGlobalUse', False)
# Define the Streamlit app
st.title("Aspected-Based Sentiment Analysis with MVP")
palette_color = sns.color_palette('Set1')

# File upload and processing
uploaded_file = st.file_uploader("Upload JSON File", type=["json"])
if uploaded_file:
    loaded_dict = json.load(uploaded_file)
    df = pd.DataFrame(loaded_dict)
    st.subheader(f"{len(df)}+ sentiment tuples was detected")
    st.write(df)

    # Sentiment Distribution Chart
    sentiment_distribution_perc = df["S"].value_counts(normalize=True) * 100
    sentiment_distribution = df["S"].value_counts()

    st.subheader("Sentiment Distribution")
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))

    ax1.pie(sentiment_distribution_perc, labels=sentiment_distribution_perc.index, autopct='%1.1f%%', startangle=140,colors=palette_color)
    ax1.axis('equal')
    ax1.set_title("Sentiment Distribution %")

    # sns.countplot(x="S", data=df, palette=palette_color, ax=ax2)
    ax2.set_title("Sentiment Distribution Counts")
    ax2.bar(sentiment_distribution.index, sentiment_distribution.values, color=palette_color)
    # ax2.xlabel("Sentiment")
    # ax2.ylabel("Times")
    # ax2.xticks(rotation=0)  # Rotate x-axis labels if needed
    st.pyplot(fig)

    # Group by and aggregate data
    grouped = df.groupby('A').agg({'S': ['count', lambda x: (x == 'great').sum(), lambda x: (x == 'ok').sum(), lambda x: (x == 'bad').sum()]})
    grouped.columns = grouped.columns.map('_'.join)
    grouped = grouped.reset_index()
    grouped = grouped.rename(columns={'A': 'Aspect', 'S_count': 'Freq', 'S_<lambda_0>': 'Great', 'S_<lambda_1>': 'Ok', 'S_<lambda_2>': 'Bad'})

    st.subheader("Top 5 Most Mentioned Product Apsects")
    st.write(grouped.sort_values(by="Freq", ascending=False).head(5))

    # Word Cloud
    aspect_terms = " ".join(df["A"])
    wordcloud = WordCloud(
        width=800,
        height=400,
        background_color='white',
        max_words=100,
        colormap='inferno',
        contour_width=3,
        contour_color='red',
    ).generate(aspect_terms)

    st.subheader("Word Cloud for Most Mentioned Aspects")
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.title("Most mentioned aspect terms")
    plt.axis("off")
    st.pyplot()

st.sidebar.markdown("**Upload a JSON file to get started.**")