kajalag commited on
Commit
d4e0f70
1 Parent(s): 7571eef

Upload helper.py

Browse files
Files changed (1) hide show
  1. helper.py +134 -0
helper.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ from urlextract import URLExtract
3
+ from collections import Counter
4
+ from wordcloud import WordCloud, STOPWORDS ,ImageColorGenerator
5
+ import pandas as pd
6
+ import matplotlib.pylab as plt
7
+ import PIL.Image
8
+ import numpy as np
9
+ import emoji
10
+
11
+ extract=URLExtract()
12
+ def fetch_stats(selected_user,df):
13
+
14
+ if selected_user!= "Group analysis":
15
+ df=df[df['users']==selected_user]
16
+ num_messages = df.shape[0]
17
+ words = []
18
+ for message in df['message']:
19
+ words.extend(message.split())
20
+
21
+
22
+ links=[]
23
+ for message in df['message']:
24
+ links.extend(extract.find_urls(message))
25
+
26
+ return num_messages, len(words),len(links)
27
+
28
+ def most_busy_users(df):
29
+ x = df['users'].value_counts().head()
30
+ df=round((df['users'].value_counts() / df.shape[0]) * 100, 2).reset_index().rename(
31
+ columns={'index': 'name', 'user': 'percent'})
32
+ return x,df
33
+
34
+ def most_common_words(selected_user,df):
35
+ f = open('stop_hinglish.txt', 'r')
36
+ stop_words = f.read()
37
+
38
+ if selected_user != "Group analysis":
39
+ df = df[df['users'] == selected_user]
40
+ temp = df[df['users'] != 'group_notification']
41
+ temp = temp[temp['message'] != '<Media omitted>\n']
42
+
43
+ words = []
44
+
45
+ for message in temp['message']:
46
+ for word in message.lower().split():
47
+ if word not in stop_words:
48
+ words.append(word)
49
+ most_common_df=pd.DataFrame(Counter(words).most_common(30))
50
+ return most_common_df
51
+
52
+ def word_cloud(selected_user,df):
53
+ if selected_user != "Group analysis":
54
+ df = df[df['users'] == selected_user]
55
+
56
+ stopwords = set('STOPWORDS')
57
+
58
+ # wordcloud
59
+ wordcloud = WordCloud(stopwords=stopwords, background_color="Black").generate(''.join(df['message']))
60
+ plt.figure(figsize=(10, 8), facecolor='k')
61
+ plt.imshow(wordcloud, interpolation='bilinear')
62
+ plt.show()
63
+
64
+ return wordcloud
65
+
66
+ def emoji_helper(selected_user,df):
67
+ if selected_user != "Group analysis":
68
+ df = df[df['users'] == selected_user]
69
+ emojis = []
70
+ for message in df['message']:
71
+ emojis.extend([c for c in message if c in emoji.EMOJI_DATA.keys()])
72
+ emoji_df=pd.DataFrame(Counter(emojis).most_common(len(Counter(emojis))))
73
+
74
+ return emoji_df
75
+
76
+ def monthly_timeline(selected_user,df):
77
+ if selected_user != "Group analysis":
78
+ df = df[df['users'] == selected_user]
79
+
80
+ timeline = df.groupby(['year', 'Month_name', 'Month']).count()['message'].reset_index()
81
+ time = []
82
+ for i in range(timeline.shape[0]):
83
+ time.append(timeline['Month_name'][i] + "-" + str(timeline['year'][i]))
84
+ timeline['time'] = time
85
+
86
+ return timeline
87
+ def Daily_timeline(selected_user,df):
88
+ if selected_user != "Group analysis":
89
+ df = df[df['users'] == selected_user]
90
+
91
+ daily_timeline = df.groupby('Date').count()['message'].reset_index()
92
+
93
+ return daily_timeline
94
+
95
+ def week_activity_map(selected_user,df):
96
+ if selected_user != "Group analysis":
97
+ df = df[df['users'] == selected_user]
98
+ return df['Day_name'].value_counts()
99
+
100
+ def month_activity_map(selected_user,df):
101
+ if selected_user != "Group analysis":
102
+ df = df[df['users'] == selected_user]
103
+ return df['Month_name'].value_counts()
104
+
105
+ def activity_heatmap(selected_user,df):
106
+ if selected_user != "Group analysis":
107
+ df = df[df['users'] == selected_user]
108
+
109
+ Activity_heatmap= df.pivot_table(index='Day_name', columns='period', values='message', aggfunc='count').fillna(0)
110
+ return Activity_heatmap
111
+
112
+ def pos_words(selected_user,df):
113
+ if selected_user != "Group analysis":
114
+ df = df[df['users'] == selected_user]
115
+
116
+ pos_word = df[df['vader_Analysis'] == 'Positive']
117
+ pos_word = pos_word.pop('message')
118
+ return pos_word
119
+
120
+ def neg_words(selected_user,df):
121
+ if selected_user != "Group analysis":
122
+ df = df[df['users'] == selected_user]
123
+
124
+ neg_word = df[df['Analysis'] == 'Negative']
125
+ neg_word = neg_word.pop('message')
126
+ return neg_word
127
+
128
+ def neu_words(selected_user,df):
129
+ if selected_user != "Group analysis":
130
+ df = df[df['users'] == selected_user]
131
+
132
+ neu_word = df[df['vader_Analysis'] == 'Neutral']
133
+ neu_word = neu_word.pop('message')
134
+ return neu_word