Kurkur99 commited on
Commit
0bd7d7b
1 Parent(s): a0e0a21

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +1 -14
eda.py CHANGED
@@ -21,14 +21,6 @@ def process_review(review):
21
  review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
22
  return review
23
 
24
- def find_review_column(data):
25
- """Attempt to identify the review column from the dataset."""
26
- potential_columns = ['review', 'text', 'comment', 'message', 'description']
27
- for col in potential_columns:
28
- if col in data.columns:
29
- return col
30
- return None
31
-
32
  def display_eda(data):
33
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
34
  if 'sentiment' not in data.columns:
@@ -50,16 +42,11 @@ def display_eda(data):
50
 
51
  # Word cloud for each sentiment
52
  st.subheader("Word Clouds for Sentiments")
53
- review_column = find_review_column(data)
54
- if not review_column:
55
- st.error("Couldn't find a column with reviews. Please check the dataset.")
56
- return
57
-
58
  sentiments = data['sentiment'].unique()
59
  for sentiment in sentiments:
60
  st.write(f"Word Cloud for {sentiment}")
61
  subset = data[data['sentiment'] == sentiment]
62
- text = " ".join(process_review(review) for review in subset[review_column])
63
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
64
  plt.figure()
65
  plt.imshow(wordcloud, interpolation="bilinear")
 
21
  review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
22
  return review
23
 
 
 
 
 
 
 
 
 
24
  def display_eda(data):
25
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
26
  if 'sentiment' not in data.columns:
 
42
 
43
  # Word cloud for each sentiment
44
  st.subheader("Word Clouds for Sentiments")
 
 
 
 
 
45
  sentiments = data['sentiment'].unique()
46
  for sentiment in sentiments:
47
  st.write(f"Word Cloud for {sentiment}")
48
  subset = data[data['sentiment'] == sentiment]
49
+ text = " ".join(process_review(review) for review in subset['review_description'])
50
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
51
  plt.figure()
52
  plt.imshow(wordcloud, interpolation="bilinear")