Spaces:
Runtime error
Runtime error
Update eda.py
Browse files
eda.py
CHANGED
@@ -21,14 +21,6 @@ def process_review(review):
|
|
21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
22 |
return review
|
23 |
|
24 |
-
def find_review_column(data):
|
25 |
-
"""Attempt to identify the review column from the dataset."""
|
26 |
-
potential_columns = ['review', 'text', 'comment', 'message', 'description']
|
27 |
-
for col in potential_columns:
|
28 |
-
if col in data.columns:
|
29 |
-
return col
|
30 |
-
return None
|
31 |
-
|
32 |
def display_eda(data):
|
33 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
34 |
if 'sentiment' not in data.columns:
|
@@ -50,16 +42,11 @@ def display_eda(data):
|
|
50 |
|
51 |
# Word cloud for each sentiment
|
52 |
st.subheader("Word Clouds for Sentiments")
|
53 |
-
review_column = find_review_column(data)
|
54 |
-
if not review_column:
|
55 |
-
st.error("Couldn't find a column with reviews. Please check the dataset.")
|
56 |
-
return
|
57 |
-
|
58 |
sentiments = data['sentiment'].unique()
|
59 |
for sentiment in sentiments:
|
60 |
st.write(f"Word Cloud for {sentiment}")
|
61 |
subset = data[data['sentiment'] == sentiment]
|
62 |
-
text = " ".join(process_review(review) for review in subset[
|
63 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
64 |
plt.figure()
|
65 |
plt.imshow(wordcloud, interpolation="bilinear")
|
|
|
21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
22 |
return review
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def display_eda(data):
|
25 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
26 |
if 'sentiment' not in data.columns:
|
|
|
42 |
|
43 |
# Word cloud for each sentiment
|
44 |
st.subheader("Word Clouds for Sentiments")
|
|
|
|
|
|
|
|
|
|
|
45 |
sentiments = data['sentiment'].unique()
|
46 |
for sentiment in sentiments:
|
47 |
st.write(f"Word Cloud for {sentiment}")
|
48 |
subset = data[data['sentiment'] == sentiment]
|
49 |
+
text = " ".join(process_review(review) for review in subset['review_description'])
|
50 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
51 |
plt.figure()
|
52 |
plt.imshow(wordcloud, interpolation="bilinear")
|