Krittaprot
commited on
Commit
•
379ee2c
1
Parent(s):
f6aaf96
Update app.py
Browse filesUpdate sentiment analysis processing to be performed in batch, this should speed things up.
app.py
CHANGED
@@ -13,7 +13,6 @@ import time
|
|
13 |
|
14 |
|
15 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
16 |
-
# text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
|
17 |
|
18 |
def extract_youtube_video_id(url_or_id):
|
19 |
"""
|
@@ -133,8 +132,19 @@ def comments_analyzer(comments_df):
|
|
133 |
if comments_df is None:
|
134 |
return None
|
135 |
else:
|
136 |
-
comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
|
137 |
-
comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
data = {}
|
140 |
#Categorize the comments by sentiment and count them
|
@@ -267,9 +277,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
267 |
|
268 |
start_time = time.time()
|
269 |
|
270 |
-
# # Text Summarization
|
271 |
-
# summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
|
272 |
-
|
273 |
end_time = time.time()
|
274 |
print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
|
275 |
|
|
|
13 |
|
14 |
|
15 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
|
|
16 |
|
17 |
def extract_youtube_video_id(url_or_id):
|
18 |
"""
|
|
|
132 |
if comments_df is None:
|
133 |
return None
|
134 |
else:
|
135 |
+
# comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
|
136 |
+
# comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
|
137 |
+
|
138 |
+
# Example of batch processing
|
139 |
+
batch_size = 20 # You can adjust this size based on your system's capabilities
|
140 |
+
sentiments = []
|
141 |
+
|
142 |
+
for i in range(0, len(comments_df), batch_size):
|
143 |
+
batch = comments_df['content'][i:i+batch_size].tolist()
|
144 |
+
batch_sentiments = [item['label'] for item in sentiment_task(batch)]
|
145 |
+
sentiments.extend(batch_sentiments)
|
146 |
+
|
147 |
+
comments_df['sentiment'] = sentiments
|
148 |
|
149 |
data = {}
|
150 |
#Categorize the comments by sentiment and count them
|
|
|
277 |
|
278 |
start_time = time.time()
|
279 |
|
|
|
|
|
|
|
280 |
end_time = time.time()
|
281 |
print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
|
282 |
|