RohitBh commited on
Commit
34c54ab
1 Parent(s): 9e01c0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -366
app.py CHANGED
@@ -15,8 +15,7 @@ hf_sentiment_analyzer = pipeline(
15
 
16
  def analyze_hf_sentiment(text):
17
  if len(text) > 512:
18
- temp = text[:511]
19
- text = temp
20
  result = hf_sentiment_analyzer(text)
21
  label = result[0]["label"]
22
  if label == "LABEL_1":
@@ -65,14 +64,6 @@ def display_pie_chart(df, column):
65
  ax.axis("equal")
66
  st.pyplot(fig)
67
 
68
- # Add a download button
69
- if st.button('Download Pie Chart'):
70
- # Save the pie chart as an image file
71
- plt.savefig('pie_chart.png')
72
-
73
- # Offer the image file for download
74
- st.download_button(label='Download Pie Chart Image', data=open('pie_chart.png', 'rb').read(), file_name='pie_chart.png', mime='image/png')
75
-
76
  # Function to display word cloud
77
  def display_wordcloud(text_data):
78
  wordcloud = WordCloud(width=800, height=400, background_color="white").generate(
@@ -83,14 +74,6 @@ def display_wordcloud(text_data):
83
  ax.axis("off")
84
  st.pyplot(fig)
85
 
86
- # Add a download button
87
- if st.button('Download Word Cloud'):
88
- # Save the word cloud as an image file
89
- plt.savefig('word_cloud.png')
90
-
91
- # Offer the image file for download
92
- st.download_button(label='Download Word Cloud Image', data=open('word_cloud.png', 'rb').read(), file_name='word_cloud.png', mime='image/png')
93
-
94
  # Function to download CSV file
95
  def download_csv(df):
96
  csv = df.to_csv(index=False)
@@ -98,366 +81,55 @@ def download_csv(df):
98
  href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>'
99
  st.markdown(href, unsafe_allow_html=True)
100
 
101
- # Function to count occurrences of keywords and sentiment distribution
102
- def count_reviews_with_keywords(df,keywords):
103
- # keywords=['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking']
104
- keyword_counts = {keyword: {"Positive": 0, "Negative": 0, "Total": 0} for keyword in keywords}
105
-
106
- for _, row in df.iterrows():
107
- review_text = row["review_text"]
108
- sentiment = row["Sentiment"]
109
-
110
- for keyword in keywords:
111
- if keyword.lower() in review_text.lower():
112
- keyword_counts[keyword]["Total"] += 1
113
- if sentiment == "Positive":
114
- keyword_counts[keyword]["Positive"] += 1
115
- elif sentiment == "Negative":
116
- keyword_counts[keyword]["Negative"] += 1
117
-
118
- return keyword_counts
119
-
120
-
121
  # Streamlit UI
122
- st.set_page_config(page_title="SentimentAnalysis App", page_icon=":smiley:")
123
- st.title("SentimentAnalysis App")
124
 
125
  # Sidebar
126
  st.sidebar.title("Options")
127
- input_option = st.sidebar.radio("Select Input Option", ("Free Text", "CSV Files"))
128
- selected_model = st.sidebar.radio(
129
- "Select Sentiment Analysis Model", ("VADER", "TextBlob", "Hugging Face")
130
  )
131
- result_option = st.sidebar.radio(
132
  "Select Result Display Option",
133
- ("DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "WordCloud", "Comparative Sentiment Analysis"),
134
  )
135
 
136
  # Main content
137
- progress_label = st.empty() # Define progress label
138
- progress_bar = st.progress(0)
139
- progress = 0
140
-
141
- # Directory path to store processed files
142
- processed_directory = "processed_files"
143
-
144
- # Ensure the directory exists, if not create it
145
- os.makedirs(processed_directory, exist_ok=True)
146
-
147
- # List to store processed filenames
148
- processed_files = []
149
-
150
- # Function to get filenames from the processed directory
151
- def get_processed_filenames():
152
- return [
153
- f
154
- for f in os.listdir(processed_directory)
155
- if os.path.isfile(os.path.join(processed_directory, f))
156
- ]
157
-
158
  if input_option == "Free Text":
159
  st.subheader("Enter review for sentiment analysis:")
160
- user_input = st.text_area("", "")
161
- if not user_input:
162
- st.info("Enter some text above for sentiment analysis.")
163
- else:
164
- with st.spinner("Analyzing..."):
165
- if selected_model == "Hugging Face":
166
- result = analyze_hf_sentiment(user_input)
167
- elif selected_model == "VADER":
168
- result = analyze_vader_sentiment(user_input)
169
- elif selected_model == "TextBlob":
170
- result = analyze_textblob_sentiment(user_input)
171
- st.write("Sentiment:", result)
 
172
 
173
  if input_option == "CSV Files":
174
- st.subheader("Select CSV files for sentiment analysis:")
175
-
176
- # Uploading new file
177
- files = st.file_uploader(
178
- "Upload New File", type=["csv"], accept_multiple_files=True
179
- )
180
- if files:
181
- # Process uploaded new files
182
- for file in files:
183
- if file.type != "text/csv":
184
- st.warning(
185
- "Uploaded file is not a CSV file. Please upload a CSV file only."
186
- )
187
- else:
188
- df = pd.read_csv(file)
189
- if "review_text" not in df.columns:
190
- st.warning(
191
- "Uploaded CSV file doesn't contain 'review_text' column. Please check the CSV file format."
192
- )
193
  else:
194
- total_rows = len(df)
195
-
196
- sentiments_v = []
197
- sentiments_tb = []
198
- sentiments_hf = []
199
-
200
- for review_text in df["review_text"]:
201
- sentiments_v.append(analyze_vader_sentiment(review_text))
202
- sentiments_tb.append(analyze_textblob_sentiment(review_text))
203
- sentiments_hf.append(analyze_hf_sentiment(review_text))
204
- progress += 1
205
- progress_label.text(f"{progress}/{total_rows}")
206
- progress_bar.progress(min(progress / total_rows, 1.0))
207
-
208
- df["VADER Sentiment"] = sentiments_v
209
- df["TextBlob Sentiment"] = sentiments_tb
210
- df["HuggingFace Sentiment"] = sentiments_hf
211
-
212
- # Save processed file with modified filename
213
- new_filename = os.path.splitext(file.name)[0] + "1.csv"
214
- df.to_csv(
215
- os.path.join(processed_directory, new_filename), index=False
216
- )
217
- st.success(f"New file processed and saved as {new_filename}")
218
-
219
- # List of already processed files
220
- processed_files = get_processed_filenames()
221
- selected_files = st.multiselect("Select from Processed Files", processed_files)
222
-
223
- if not files and not selected_files:
224
- st.info(
225
- "Upload a new CSV file or select from processed files above for sentiment analysis."
226
- )
227
-
228
- all_dfs = []
229
-
230
- # Process already selected files
231
- for file_name in selected_files:
232
- df = pd.read_csv(os.path.join(processed_directory, file_name))
233
- all_dfs.append(df)
234
-
235
- # Results
236
- if all_dfs:
237
- combined_df = pd.concat(all_dfs, ignore_index=True)
238
- if selected_model == "TextBlob":
239
- result = "TextBlob Sentiment"
240
- combined_df.drop(
241
- columns=["VADER Sentiment", "HuggingFace Sentiment"],
242
- inplace=True,
243
- )
244
- elif selected_model == "VADER":
245
- result = "VADER Sentiment"
246
- combined_df.drop(
247
- columns=["TextBlob Sentiment", "HuggingFace Sentiment"],
248
- inplace=True,
249
- )
250
- elif selected_model == "Hugging Face":
251
- result = "HuggingFace Sentiment"
252
- combined_df.drop(
253
- columns=["TextBlob Sentiment", "VADER Sentiment"],
254
- inplace=True,
255
- )
256
- combined_df.rename(columns={result: "Sentiment"}, inplace=True)
257
-
258
- if result_option == "DataFrame":
259
- st.subheader("Sentiment Analysis Results")
260
- display_dataframe(combined_df)
261
- download_csv(combined_df)
262
- elif result_option == "Pie Chart":
263
- st.subheader("Sentiment Distribution")
264
- display_pie_chart(combined_df, "Sentiment")
265
- elif result_option == "Bar Chart":
266
- # Calculate value counts
267
- sentiment_counts = combined_df["Sentiment"].value_counts()
268
- # Display bar chart
269
- st.bar_chart(sentiment_counts)
270
-
271
- # Add a download button
272
- if st.button('Download Sentiment Counts Chart'):
273
- # Plot the bar chart
274
- fig, ax = plt.subplots()
275
- sentiment_counts.plot(kind='bar', ax=ax)
276
- plt.xlabel('Sentiment')
277
- plt.ylabel('Count')
278
- plt.title('Sentiment Counts')
279
- plt.xticks(rotation=45, ha='right')
280
- plt.tight_layout()
281
-
282
- # Save the bar chart as an image file
283
- plt.savefig('sentiment_counts_chart.png')
284
-
285
- # Offer the image file for download
286
- st.download_button(label='Download Sentiment Counts Chart Image', data=open('sentiment_counts_chart.png', 'rb').read(), file_name='sentiment_counts_chart.png', mime='image/png')
287
-
288
- elif result_option == "Keyword Frequency":
289
- st.subheader("Keyword Frequency")
290
-
291
- # List of keywords
292
- keywords = [
293
- "delivery",
294
- "shipping",
295
- "parcel",
296
- "package",
297
- "tracking",
298
- "shipment",
299
- "cargo",
300
- "freight",
301
- "automation",
302
- "automated",
303
- "robotic",
304
- "robots",
305
- "AI",
306
- "artificial intelligence",
307
- "machine learning",
308
- "chatbot",
309
- "virtual assistant",
310
- "customer support",
311
- "real-time",
312
- "instant",
313
- "live update",
314
- "status",
315
- "IoT",
316
- "internet of things",
317
- "connected devices",
318
- "smart technology",
319
- "blockchain",
320
- "ledger",
321
- "transparency",
322
- "security",
323
- "sustainability",
324
- "eco-friendly",
325
- "green logistics",
326
- "carbon footprint",
327
- "customer service",
328
- "support",
329
- "experience",
330
- "satisfaction",
331
- "data analytics",
332
- "big data",
333
- "analysis",
334
- "insights",
335
- "cloud computing",
336
- "cloud-based",
337
- "digital infrastructure",
338
- "storage",
339
- "5G",
340
- "connectivity",
341
- "network speed",
342
- "wireless",
343
- "drone",
344
- "aerial delivery",
345
- "UAV",
346
- "drone shipping",
347
- "augmented reality",
348
- "AR",
349
- "virtual reality",
350
- "VR",
351
- "3D printing",
352
- "additive manufacturing",
353
- "custom parts",
354
- "prototyping",
355
- "inventory management",
356
- "stock levels",
357
- "warehouse management",
358
- "storage solutions",
359
- "supply chain",
360
- "logistics",
361
- "supply network",
362
- "distribution",
363
- "eco-packaging",
364
- "sustainable materials",
365
- "recycling",
366
- "waste reduction",
367
- "digital platform",
368
- "e-commerce",
369
- "online shopping",
370
- "online order",
371
- "cybersecurity",
372
- "data protection",
373
- "privacy",
374
- "encryption",
375
- "predictive modeling",
376
- "forecasting",
377
- "demand planning",
378
- "trend analysis",
379
- "robotics",
380
- "automated vehicles",
381
- "self-driving cars",
382
- "logistics automation",
383
- "visibility",
384
- "supply chain visibility",
385
- "track and trace",
386
- "monitoring",
387
- "integration",
388
- "ERP",
389
- "supply chain integration",
390
- "software",
391
- "optimization",
392
- "efficiency",
393
- "process improvement",
394
- "lean logistics",
395
- "personalization",
396
- "customization",
397
- "tailored services",
398
- "personal touch",
399
- "ethical sourcing",
400
- "fair trade",
401
- "labor rights",
402
- "ethical business",
403
- "user experience",
404
- "UX",
405
- "customer journey",
406
- "service design",
407
- "visibility",
408
- ]
409
- text_data = " ".join(combined_df["review_text"])
410
- keyword_frequency = (
411
- pd.Series(text_data.split()).value_counts().reset_index()
412
- )
413
- keyword_frequency.columns = ["Keyword", "Frequency"]
414
-
415
- # Filter keyword frequency for specific keywords
416
- filtered_keyword_frequency = keyword_frequency[
417
- keyword_frequency["Keyword"].isin(keywords)
418
- ]
419
-
420
- # Display bar chart for filtered keyword frequency
421
- st.bar_chart(filtered_keyword_frequency.set_index("Keyword"))
422
-
423
- # Add a download button
424
- if st.button('Download Keyword Frequency Chart'):
425
- # Plot the bar chart
426
- fig, ax = plt.subplots()
427
- filtered_keyword_frequency.plot(kind='bar', x='Keyword', y='Frequency', ax=ax)
428
- plt.xticks(rotation=45, ha='right')
429
- plt.tight_layout()
430
-
431
- # Save the bar chart as an image file
432
- plt.savefig('keyword_frequency_chart.png')
433
-
434
- # Offer the image file for download
435
- st.download_button(label='Download Keyword Frequency Chart Image', data=open('keyword_frequency_chart.png', 'rb').read(), file_name='keyword_frequency_chart.png', mime='image/png')
436
- elif result_option == "Word Cloud":
437
- st.subheader("Word Cloud")
438
- text_data = " ".join(combined_df["review_text"])
439
- display_wordcloud(text_data)
440
  else:
441
- st.subheader("Comparative Sentiment Analysis")
442
- supply_chain_areas = {
443
- 'logistics': ['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'],
444
- 'delivery': ['delivery', 'shipping', 'courier', 'postal', 'parcel'],
445
- 'inventory': ['inventory', 'stock', 'storage', 'warehouse', 'security’'],
446
- 'customer service': ['customer service', 'support', 'helpdesk', 'service center', 'experience', 'refund'],
447
- 'procurement': ['procurement', 'sourcing', 'purchasing', 'buying', 'order'],
448
- 'distribution': ['distribution', 'supply network', 'distribution center'],
449
- 'manufacturing': ['manufacturing', 'production', 'assembly', 'quality', 'defect']
450
- }
451
-
452
- supply_chain_area = st.sidebar.radio(
453
- "Select Supply Chain Area",
454
- ("logistics", "delivery", "inventory", "customer service", "procurement", "distribution","manufacturing"),
455
- )
456
- # Call the function to count occurrences of keywords and sentiment distribution
457
- keyword_counts = count_reviews_with_keywords(combined_df,supply_chain_areas[supply_chain_area])
458
-
459
- # Convert keyword_counts to DataFrame
460
- df_counts = pd.DataFrame(keyword_counts).transpose()
461
-
462
- # Plot dual bar chart horizontally
463
- st.bar_chart(df_counts[["Positive", "Negative"]], use_container_width=True, height=500)
 
15
 
16
  def analyze_hf_sentiment(text):
17
  if len(text) > 512:
18
+ text = text[:511]
 
19
  result = hf_sentiment_analyzer(text)
20
  label = result[0]["label"]
21
  if label == "LABEL_1":
 
64
  ax.axis("equal")
65
  st.pyplot(fig)
66
 
 
 
 
 
 
 
 
 
67
  # Function to display word cloud
68
  def display_wordcloud(text_data):
69
  wordcloud = WordCloud(width=800, height=400, background_color="white").generate(
 
74
  ax.axis("off")
75
  st.pyplot(fig)
76
 
 
 
 
 
 
 
 
 
77
  # Function to download CSV file
78
  def download_csv(df):
79
  csv = df.to_csv(index=False)
 
81
  href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>'
82
  st.markdown(href, unsafe_allow_html=True)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # Streamlit UI
85
+ st.set_page_config(page_title="Sentiment Analysis App", page_icon=":smiley:")
86
+ st.title("Sentiment Analysis App")
87
 
88
  # Sidebar
89
  st.sidebar.title("Options")
90
+ input_option = st.sidebar.select_slider("Select Input Option", options=["Free Text", "CSV Files"])
91
+ selected_model = st.sidebar.select_slider(
92
+ "Select Sentiment Analysis Model", options=["VADER", "TextBlob", "Hugging Face"]
93
  )
94
+ result_option = st.sidebar.select_slider(
95
  "Select Result Display Option",
96
+ options=["DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "Word Cloud", "Comparative Sentiment Analysis"],
97
  )
98
 
99
  # Main content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  if input_option == "Free Text":
101
  st.subheader("Enter review for sentiment analysis:")
102
+ user_input = st.text_input("", placeholder="Enter your text here")
103
+ if st.button('Analyze'):
104
+ if user_input:
105
+ with st.spinner("Analyzing..."):
106
+ if selected_model == "Hugging Face":
107
+ result = analyze_hf_sentiment(user_input)
108
+ elif selected_model == "VADER":
109
+ result = analyze_vader_sentiment(user_input)
110
+ elif selected_model == "TextBlob":
111
+ result = analyze_textblob_sentiment(user_input)
112
+ st.write("Sentiment:", result)
113
+ else:
114
+ st.error("Please enter some text to analyze.")
115
 
116
  if input_option == "CSV Files":
117
+ st.subheader("Upload CSV files for sentiment analysis:")
118
+ uploaded_files = st.file_uploader("Choose a CSV file", accept_multiple_files=True)
119
+ if st.button('Start Analysis'):
120
+ if uploaded_files:
121
+ for uploaded_file in uploaded_files:
122
+ df = pd.read_csv(uploaded_file)
123
+ if 'review_text' in df.columns:
124
+ df['Sentiment'] = df['review_text'].apply(lambda x: analyze_hf_sentiment(x) if selected_model == "Hugging Face" else (analyze_vader_sentiment(x) if selected_model == "VADER" else analyze_textblob_sentiment(x)))
125
+ if result_option == "DataFrame":
126
+ display_dataframe(df)
127
+ elif result_option == "Pie Chart":
128
+ display_pie_chart(df, 'Sentiment')
129
+ elif result_option == "Word Cloud":
130
+ combined_text = ' '.join(df['review_text'])
131
+ display_wordcloud(combined_text)
 
 
 
 
132
  else:
133
+ st.error("CSV must contain 'review_text' column.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  else:
135
+ st.error("Please upload a CSV file.")