Krittaprot
commited on
Commit
•
6d3a753
1
Parent(s):
4fdac55
Fix bugs
Browse files
app.py
CHANGED
@@ -13,8 +13,6 @@ import time
|
|
13 |
|
14 |
|
15 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
16 |
-
# summarization_task = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
17 |
-
|
18 |
|
19 |
def extract_youtube_video_id(url_or_id):
|
20 |
"""
|
@@ -144,7 +142,7 @@ def comments_analyzer(comments_df):
|
|
144 |
for i in range(0, len(comments_df), batch_size):
|
145 |
batch = comments_df['content'][i:i+batch_size].tolist()
|
146 |
batch_results = sentiment_task(batch)
|
147 |
-
|
148 |
# Extracting both sentiment labels and scores
|
149 |
batch_sentiments = [item['label'] for item in batch_results]
|
150 |
batch_scores = [item['score'] for item in batch_results]
|
@@ -163,32 +161,19 @@ def comments_analyzer(comments_df):
|
|
163 |
top_comments = filtered_comments.nlargest(top_n, 'score')
|
164 |
|
165 |
if not top_comments.empty:
|
166 |
-
return '\n\n'.join(f"{row['content']}" for _, row in top_comments.iterrows())
|
167 |
else:
|
168 |
return f"No {sentiment_type} comments available."
|
169 |
|
170 |
start_time = time.time()
|
171 |
# Get top positive comments
|
172 |
top_positive_comments = get_top_comments(comments_df, 'positive')
|
|
|
173 |
# Get top negative comments
|
174 |
top_negative_comments = get_top_comments(comments_df, 'negative')
|
175 |
end_time = time.time()
|
176 |
print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
|
177 |
|
178 |
-
# #Summarize the texts from positive and negative comments
|
179 |
-
# start_time = time.time()
|
180 |
-
# if top_positive_comments == "No positive comments available.":
|
181 |
-
# top_positive_comments_summary = top_positive_comments
|
182 |
-
# else:
|
183 |
-
# top_positive_comments_summary = summarization_task(top_positive_comments)[0]['summary_text']
|
184 |
-
|
185 |
-
# if top_negative_comments == "No negative comments available.":
|
186 |
-
# top_negative_comments_summary = top_negative_comments
|
187 |
-
# else:
|
188 |
-
# top_negative_comments_summary = summarization_task(top_negative_comments)[0]['summary_text']
|
189 |
-
# end_time = time.time()
|
190 |
-
# print(f"Time taken for summarizing the top n positive/negative comments: {end_time - start_time} seconds")
|
191 |
-
|
192 |
data = {}
|
193 |
#Categorize the comments by sentiment and count them
|
194 |
data['total_comments'] = len(comments_df)
|
@@ -298,14 +283,14 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
298 |
|
299 |
end_time = time.time()
|
300 |
print(f"Time taken for loading comments: {end_time - start_time} seconds")
|
301 |
-
|
302 |
# Analyze
|
303 |
-
analysis_dict, top_positive_comments,
|
304 |
|
305 |
long_text = analysis_dict['blended_comments']
|
306 |
|
307 |
start_time = time.time()
|
308 |
-
|
309 |
# Generate word cloud
|
310 |
word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
|
311 |
|
@@ -321,7 +306,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
321 |
print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
|
322 |
|
323 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
324 |
-
return word_cloud_img,
|
325 |
|
326 |
############################################################################################################################################
|
327 |
# Gradio interface
|
|
|
13 |
|
14 |
|
15 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
|
|
|
|
16 |
|
17 |
def extract_youtube_video_id(url_or_id):
|
18 |
"""
|
|
|
142 |
for i in range(0, len(comments_df), batch_size):
|
143 |
batch = comments_df['content'][i:i+batch_size].tolist()
|
144 |
batch_results = sentiment_task(batch)
|
145 |
+
|
146 |
# Extracting both sentiment labels and scores
|
147 |
batch_sentiments = [item['label'] for item in batch_results]
|
148 |
batch_scores = [item['score'] for item in batch_results]
|
|
|
161 |
top_comments = filtered_comments.nlargest(top_n, 'score')
|
162 |
|
163 |
if not top_comments.empty:
|
164 |
+
return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
|
165 |
else:
|
166 |
return f"No {sentiment_type} comments available."
|
167 |
|
168 |
start_time = time.time()
|
169 |
# Get top positive comments
|
170 |
top_positive_comments = get_top_comments(comments_df, 'positive')
|
171 |
+
|
172 |
# Get top negative comments
|
173 |
top_negative_comments = get_top_comments(comments_df, 'negative')
|
174 |
end_time = time.time()
|
175 |
print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
data = {}
|
178 |
#Categorize the comments by sentiment and count them
|
179 |
data['total_comments'] = len(comments_df)
|
|
|
283 |
|
284 |
end_time = time.time()
|
285 |
print(f"Time taken for loading comments: {end_time - start_time} seconds")
|
286 |
+
|
287 |
# Analyze
|
288 |
+
analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
|
289 |
|
290 |
long_text = analysis_dict['blended_comments']
|
291 |
|
292 |
start_time = time.time()
|
293 |
+
|
294 |
# Generate word cloud
|
295 |
word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
|
296 |
|
|
|
306 |
print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
|
307 |
|
308 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
309 |
+
return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
|
310 |
|
311 |
############################################################################################################################################
|
312 |
# Gradio interface
|