Kurkur99's picture
Update eda.py
720192c
raw
history blame
No virus
1.22 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
def display_eda(data):
# Check if 'sentiment' column exists in the dataset
if 'sentiment' not in data.columns:
st.error("The dataset does not contain a 'sentiment' column. Please check the data source.")
return
# Distribution of sentiments
st.subheader("Distribution of Sentiments")
sentiment_counts = data['sentiment'].value_counts()
fig, ax = plt.subplots()
sentiment_counts.plot(kind='bar', ax=ax)
ax.set_title('Distribution of Sentiments')
ax.set_xlabel('Sentiment')
ax.set_ylabel('Count')
st.pyplot(fig)
# Word cloud for each sentiment
st.subheader("Word Clouds for Sentiments")
sentiments = data['sentiment'].unique()
for sentiment in sentiments:
st.write(f"Word Cloud for {sentiment}")
subset = data[data['sentiment'] == sentiment]
text = " ".join(review for review in subset['processed_review'])
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
plt.figure()
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
st.pyplot()