Spaces:
Runtime error
Runtime error
Commit
·
fccd4a8
1
Parent(s):
cd80221
first
Browse files- app.py +113 -0
- helper_functions.py +255 -0
- static/en_stopwords.txt +102 -0
- static/en_stopwords_ngram.txt +134 -0
- static/quartzo.ttf +0 -0
- static/twitter_mask.png +0 -0
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
st.set_page_config(
|
3 |
+
page_title="Social Media Sentiment Analyzer", page_icon="📊", layout="wide"
|
4 |
+
)
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
import helper_functions as hf
|
8 |
+
import plotly.express as px
|
9 |
+
import plotly.io as pio
|
10 |
+
import plotly
|
11 |
+
|
12 |
+
# Whenever the search button is clicked, the search_callback function is called
|
13 |
+
def search_callback():
|
14 |
+
if twitter_agree:
|
15 |
+
if len(st.session_state.search_term) == 0:
|
16 |
+
st.error("Please enter a search term")
|
17 |
+
return
|
18 |
+
st.session_state.df = hf.get_tweets(st.session_state.search_term, st.session_state.num_tweets)
|
19 |
+
st.session_state.df = hf.get_sentiment(st.session_state.df)
|
20 |
+
|
21 |
+
def twitter_form():
|
22 |
+
with st.form(key="search_form"):
|
23 |
+
st.subheader("Search Parameters")
|
24 |
+
st.text_input("Enter a User handle (like _@elonmusk_), Hashtag (like _#Bitcoin_) or Topic (like _climate change_)", key="search_term")
|
25 |
+
st.slider("Number of tweets", min_value=100, max_value=500, key="num_tweets")
|
26 |
+
st.form_submit_button(label="Search", on_click=search_callback)
|
27 |
+
st.markdown(
|
28 |
+
"Note: it may take a while to load the results, especially with large number of tweets"
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
with st.sidebar:
|
33 |
+
st.title("Social Media Sentiment Analyzer")
|
34 |
+
st.subheader("Choose your platform")
|
35 |
+
twitter_agree = st.checkbox('Twitter')
|
36 |
+
|
37 |
+
if twitter_agree:
|
38 |
+
twitter_form()
|
39 |
+
|
40 |
+
st.markdown(
|
41 |
+
"<div style='position: fixed; bottom: 0;'>Created by Taaha Bajwa</div>",
|
42 |
+
unsafe_allow_html=True,
|
43 |
+
)
|
44 |
+
|
45 |
+
if "df" in st.session_state:
|
46 |
+
|
47 |
+
def make_dashboard(tweet_df, bar_color, wc_color):
|
48 |
+
# first row
|
49 |
+
col1, col2, col3 = st.columns([28, 34, 38])
|
50 |
+
with col1:
|
51 |
+
sentiment_plot = hf.plot_sentiment(tweet_df)
|
52 |
+
sentiment_plot.update_layout(height=350, title_x=0.5)
|
53 |
+
st.plotly_chart(sentiment_plot, theme=None, use_container_width=True)
|
54 |
+
with col2:
|
55 |
+
top_unigram = hf.get_top_n_gram(tweet_df, ngram_range=(1, 1), n=10)
|
56 |
+
unigram_plot = hf.plot_n_gram(
|
57 |
+
top_unigram, title="Top 10 Occuring Words", color=bar_color
|
58 |
+
)
|
59 |
+
unigram_plot.update_layout(height=350)
|
60 |
+
st.plotly_chart(unigram_plot, theme=None, use_container_width=True)
|
61 |
+
with col3:
|
62 |
+
top_bigram = hf.get_top_n_gram(tweet_df, ngram_range=(2, 2), n=10)
|
63 |
+
bigram_plot = hf.plot_n_gram(
|
64 |
+
top_bigram, title="Top 10 Occuring Bigrams", color=bar_color
|
65 |
+
)
|
66 |
+
bigram_plot.update_layout(height=350)
|
67 |
+
st.plotly_chart(bigram_plot, theme=None, use_container_width=True)
|
68 |
+
|
69 |
+
# second row
|
70 |
+
col1, col2 = st.columns([60, 40])
|
71 |
+
with col1:
|
72 |
+
|
73 |
+
def sentiment_color(sentiment):
|
74 |
+
if sentiment == "Positive":
|
75 |
+
return "background-color: #54A24B; color: white"
|
76 |
+
elif sentiment == "Negative":
|
77 |
+
return "background-color: #FF7F0E"
|
78 |
+
else:
|
79 |
+
return "background-color: #1F77B4"
|
80 |
+
|
81 |
+
st.dataframe(
|
82 |
+
tweet_df[["Sentiment", "Tweet"]].style.applymap(
|
83 |
+
sentiment_color, subset=["Sentiment"]
|
84 |
+
),
|
85 |
+
height=350,
|
86 |
+
)
|
87 |
+
with col2:
|
88 |
+
wordcloud = hf.plot_wordcloud(tweet_df, colormap=wc_color)
|
89 |
+
st.pyplot(wordcloud)
|
90 |
+
|
91 |
+
adjust_tab_font = """
|
92 |
+
<style>
|
93 |
+
button[data-baseweb="tab"] > div[data-testid="stMarkdownContainer"] > p {
|
94 |
+
font-size: 20px;
|
95 |
+
}
|
96 |
+
</style>
|
97 |
+
"""
|
98 |
+
|
99 |
+
st.write(adjust_tab_font, unsafe_allow_html=True)
|
100 |
+
|
101 |
+
tab1, tab2, tab3, tab4 = st.tabs(["All", "Positive 😊", "Negative ☹️", "Neutral 😐"])
|
102 |
+
with tab1:
|
103 |
+
tweet_df = st.session_state.df
|
104 |
+
make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
|
105 |
+
with tab2:
|
106 |
+
tweet_df = st.session_state.df.query("Sentiment == 'Positive'")
|
107 |
+
make_dashboard(tweet_df, bar_color="#54A24B", wc_color="Greens")
|
108 |
+
with tab3:
|
109 |
+
tweet_df = st.session_state.df.query("Sentiment == 'Negative'")
|
110 |
+
make_dashboard(tweet_df, bar_color="#FF7F0E", wc_color="Oranges")
|
111 |
+
with tab4:
|
112 |
+
tweet_df = st.session_state.df.query("Sentiment == 'Neutral'")
|
113 |
+
make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
|
helper_functions.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import snscrape.modules.twitter as sntwitter
|
4 |
+
import streamlit as st
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
+
from transformers import pipeline
|
7 |
+
import plotly.express as px
|
8 |
+
import plotly.io as pio
|
9 |
+
import matplotlib as mpl
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from wordcloud import WordCloud
|
12 |
+
from PIL import Image
|
13 |
+
|
14 |
+
@st.cache(allow_output_mutation=True)
|
15 |
+
def get_nltk():
|
16 |
+
import nltk
|
17 |
+
nltk.download(
|
18 |
+
["punkt", "wordnet", "omw-1.4", "averaged_perceptron_tagger", "universal_tagset"]
|
19 |
+
)
|
20 |
+
return
|
21 |
+
get_nltk()
|
22 |
+
|
23 |
+
from nltk.stem import WordNetLemmatizer
|
24 |
+
from nltk.tag import pos_tag
|
25 |
+
from nltk.tokenize import word_tokenize
|
26 |
+
import re
|
27 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
28 |
+
|
29 |
+
# Create a custom plotly theme and set it as default
|
30 |
+
pio.templates["custom"] = pio.templates["plotly_white"]
|
31 |
+
pio.templates["custom"].layout.margin = {"b": 25, "l": 25, "r": 25, "t": 50}
|
32 |
+
pio.templates["custom"].layout.width = 600
|
33 |
+
pio.templates["custom"].layout.height = 450
|
34 |
+
pio.templates["custom"].layout.autosize = False
|
35 |
+
pio.templates["custom"].layout.font.update(
|
36 |
+
{"family": "Arial", "size": 12, "color": "#707070"}
|
37 |
+
)
|
38 |
+
pio.templates["custom"].layout.title.update(
|
39 |
+
{
|
40 |
+
"xref": "container",
|
41 |
+
"yref": "container",
|
42 |
+
"x": 0.5,
|
43 |
+
"yanchor": "top",
|
44 |
+
"font_size": 16,
|
45 |
+
"y": 0.95,
|
46 |
+
"font_color": "#353535",
|
47 |
+
}
|
48 |
+
)
|
49 |
+
pio.templates["custom"].layout.xaxis.update(
|
50 |
+
{"showline": True, "linecolor": "lightgray", "title_font_size": 14}
|
51 |
+
)
|
52 |
+
pio.templates["custom"].layout.yaxis.update(
|
53 |
+
{"showline": True, "linecolor": "lightgray", "title_font_size": 14}
|
54 |
+
)
|
55 |
+
pio.templates["custom"].layout.colorway = [
|
56 |
+
"#1F77B4",
|
57 |
+
"#FF7F0E",
|
58 |
+
"#54A24B",
|
59 |
+
"#D62728",
|
60 |
+
"#C355FA",
|
61 |
+
"#8C564B",
|
62 |
+
"#E377C2",
|
63 |
+
"#7F7F7F",
|
64 |
+
"#FFE323",
|
65 |
+
"#17BECF",
|
66 |
+
]
|
67 |
+
pio.templates.default = "custom"
|
68 |
+
|
69 |
+
@st.cache(allow_output_mutation=True)
|
70 |
+
def get_sentiment_model():
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
72 |
+
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
73 |
+
return tokenizer,model
|
74 |
+
|
75 |
+
tokenizer_sentiment,model_sentiment = get_sentiment_model()
|
76 |
+
|
77 |
+
def get_tweets(query, max_tweets):
|
78 |
+
if query[0] == '@':
|
79 |
+
query = query[1:]
|
80 |
+
tweets_list = []
|
81 |
+
|
82 |
+
# Using TwitterSearchScraper to scrape data
|
83 |
+
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:'+query).get_items()):
|
84 |
+
if i>max_tweets:
|
85 |
+
break
|
86 |
+
tweets_list.append([tweet.date, tweet.user.username, tweet.content])
|
87 |
+
|
88 |
+
# Creating a dataframe from the tweets list above
|
89 |
+
tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Username', 'Tweet'])
|
90 |
+
|
91 |
+
else:
|
92 |
+
# Creating list to append tweet data to
|
93 |
+
tweets_list = []
|
94 |
+
|
95 |
+
# Using TwitterSearchScraper to scrape data and append tweets to list
|
96 |
+
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query+' until:').get_items()):
|
97 |
+
if i>max_tweets:
|
98 |
+
break
|
99 |
+
tweets_list.append([tweet.date, tweet.user.username, tweet.content])
|
100 |
+
|
101 |
+
# Creating a dataframe from the tweets list above
|
102 |
+
tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Username', 'Tweet'])
|
103 |
+
|
104 |
+
|
105 |
+
tweets_df['Datetime'] = pd.to_datetime(tweets_df['Datetime'])
|
106 |
+
tweets_df['Date'] = tweets_df['Datetime'].dt.date
|
107 |
+
tweets_df['Time'] = tweets_df['Datetime'].dt.strftime('%H:%M') #tweets_df['Datetime'].dt.time
|
108 |
+
tweets_df.drop('Datetime', axis=1, inplace=True)
|
109 |
+
return tweets_df
|
110 |
+
|
111 |
+
def text_preprocessing(text):
|
112 |
+
stopwords = set()
|
113 |
+
with open("static/en_stopwords.txt", "r") as file:
|
114 |
+
for word in file:
|
115 |
+
stopwords.add(word.rstrip("\n"))
|
116 |
+
lemmatizer = WordNetLemmatizer()
|
117 |
+
try:
|
118 |
+
url_pattern = r"((http://)[^ ]*|(https://)[^ ]*|(www\.)[^ ]*)"
|
119 |
+
user_pattern = r"@[^\s]+"
|
120 |
+
entity_pattern = r"&.*;"
|
121 |
+
neg_contraction = r"n't\W"
|
122 |
+
non_alpha = "[^a-z]"
|
123 |
+
cleaned_text = text.lower()
|
124 |
+
cleaned_text = re.sub(neg_contraction, " not ", cleaned_text)
|
125 |
+
cleaned_text = re.sub(url_pattern, " ", cleaned_text)
|
126 |
+
cleaned_text = re.sub(user_pattern, " ", cleaned_text)
|
127 |
+
cleaned_text = re.sub(entity_pattern, " ", cleaned_text)
|
128 |
+
cleaned_text = re.sub(non_alpha, " ", cleaned_text)
|
129 |
+
tokens = word_tokenize(cleaned_text)
|
130 |
+
#print('tokens')
|
131 |
+
# provide POS tag for lemmatization to yield better result
|
132 |
+
word_tag_tuples = pos_tag(tokens, tagset="universal")
|
133 |
+
tag_dict = {"NOUN": "n", "VERB": "v", "ADJ": "a", "ADV": "r"}
|
134 |
+
final_tokens = []
|
135 |
+
|
136 |
+
|
137 |
+
for word, tag in word_tag_tuples:
|
138 |
+
if len(word) > 1 and word not in stopwords:
|
139 |
+
if tag in tag_dict:
|
140 |
+
final_tokens.append(lemmatizer.lemmatize(word, tag_dict[tag]))
|
141 |
+
else:
|
142 |
+
final_tokens.append(lemmatizer.lemmatize(word))
|
143 |
+
return " ".join(final_tokens)
|
144 |
+
except:
|
145 |
+
return np.nan
|
146 |
+
|
147 |
+
def get_sentiment(df):
|
148 |
+
useful_sentence = df['Tweet'].tolist()
|
149 |
+
tokenizer = tokenizer_sentiment
|
150 |
+
model = model_sentiment
|
151 |
+
pipe = pipeline(model="ProsusAI/finbert")
|
152 |
+
classifier = pipeline(model="ProsusAI/finbert")
|
153 |
+
output=[]
|
154 |
+
i=0
|
155 |
+
useful_sentence_len = len(useful_sentence)
|
156 |
+
for temp in useful_sentence:
|
157 |
+
output.extend(classifier(temp))
|
158 |
+
i=i+1
|
159 |
+
|
160 |
+
df_temp = pd.DataFrame.from_dict(output)
|
161 |
+
df = pd.concat([df, df_temp], axis=1)
|
162 |
+
df = df.rename(columns={'label': 'Sentiment'})
|
163 |
+
df['Sentiment'] = df['Sentiment'].replace('positive', 'Positive')
|
164 |
+
df['Sentiment'] = df['Sentiment'].replace('negative', 'Negative')
|
165 |
+
df['Sentiment'] = df['Sentiment'].replace('neutral', 'Neutral')
|
166 |
+
return df
|
167 |
+
|
168 |
+
def plot_sentiment(tweet_df):
|
169 |
+
sentiment_count = tweet_df["Sentiment"].value_counts()
|
170 |
+
fig = px.pie(
|
171 |
+
values=sentiment_count.values,
|
172 |
+
names=sentiment_count.index,
|
173 |
+
hole=0.3,
|
174 |
+
title="<b>Sentiment Distribution</b>",
|
175 |
+
color=sentiment_count.index,
|
176 |
+
color_discrete_map={"Positive": "#54A24B", "Negative": "#FF7F0E", "Neutral": "#1F77B4"},
|
177 |
+
)
|
178 |
+
fig.update_traces(
|
179 |
+
textposition="inside",
|
180 |
+
texttemplate="%{label}<br>%{value} (%{percent})",
|
181 |
+
hovertemplate="<b>%{label}</b><br>Percentage=%{percent}<br>Count=%{value}",
|
182 |
+
)
|
183 |
+
fig.update_layout(showlegend=False)
|
184 |
+
return fig
|
185 |
+
|
186 |
+
def get_top_n_gram(tweet_df, ngram_range, n=10):
|
187 |
+
stopwords = set()
|
188 |
+
with open("static/en_stopwords_ngram.txt", "r") as file:
|
189 |
+
for word in file:
|
190 |
+
stopwords.add(word.rstrip("\n"))
|
191 |
+
stopwords = list(stopwords)
|
192 |
+
corpus = tweet_df["Tweet"]
|
193 |
+
vectorizer = CountVectorizer(
|
194 |
+
analyzer="word", ngram_range=ngram_range, stop_words=stopwords
|
195 |
+
)
|
196 |
+
X = vectorizer.fit_transform(corpus.astype(str).values)
|
197 |
+
words = vectorizer.get_feature_names_out()
|
198 |
+
words_count = np.ravel(X.sum(axis=0))
|
199 |
+
df = pd.DataFrame(zip(words, words_count))
|
200 |
+
df.columns = ["words", "counts"]
|
201 |
+
df = df.sort_values(by="counts", ascending=False).head(n)
|
202 |
+
df["words"] = df["words"].str.title()
|
203 |
+
return df
|
204 |
+
|
205 |
+
def plot_n_gram(n_gram_df, title, color="#54A24B"):
|
206 |
+
fig = px.bar(
|
207 |
+
# n_gram_df,
|
208 |
+
# x="counts",
|
209 |
+
# y="words",
|
210 |
+
x=n_gram_df.counts,
|
211 |
+
y=n_gram_df.words,
|
212 |
+
title="<b>{}</b>".format(title),
|
213 |
+
text_auto=True,
|
214 |
+
)
|
215 |
+
fig.update_layout(plot_bgcolor="white")
|
216 |
+
fig.update_xaxes(title=None)
|
217 |
+
fig.update_yaxes(autorange="reversed", title=None)
|
218 |
+
fig.update_traces(hovertemplate="<b>%{y}</b><br>Count=%{x}", marker_color=color)
|
219 |
+
return fig
|
220 |
+
|
221 |
+
def plot_wordcloud(tweet_df, colormap="Greens"):
|
222 |
+
stopwords = set()
|
223 |
+
with open("static/en_stopwords_ngram.txt", "r") as file:
|
224 |
+
for word in file:
|
225 |
+
stopwords.add(word.rstrip("\n"))
|
226 |
+
cmap = mpl.cm.get_cmap(colormap)(np.linspace(0, 1, 20))
|
227 |
+
cmap = mpl.colors.ListedColormap(cmap[10:15])
|
228 |
+
mask = np.array(Image.open("static/twitter_mask.png"))
|
229 |
+
font = "static/quartzo.ttf"
|
230 |
+
#tweet_df["Cleaned_Tweet"] = tweet_df["Tweet"].apply(lambda x: text_preprocessing(x))
|
231 |
+
tweet_df["Cleaned_Tweet"] = tweet_df["Tweet"].apply(text_preprocessing)
|
232 |
+
#print(tweet_df["Cleaned_Tweet"])
|
233 |
+
text = " ".join(tweet_df["Cleaned_Tweet"])
|
234 |
+
#print(text)
|
235 |
+
wc = WordCloud(
|
236 |
+
background_color="white",
|
237 |
+
font_path=font,
|
238 |
+
stopwords=stopwords,
|
239 |
+
max_words=90,
|
240 |
+
colormap=cmap,
|
241 |
+
mask=mask,
|
242 |
+
random_state=42,
|
243 |
+
collocations=False,
|
244 |
+
min_word_length=2,
|
245 |
+
max_font_size=200,
|
246 |
+
)
|
247 |
+
wc.generate(text)
|
248 |
+
fig = plt.figure(figsize=(8, 8))
|
249 |
+
ax = fig.add_subplot(1, 1, 1)
|
250 |
+
plt.imshow(wc, interpolation="bilinear")
|
251 |
+
plt.axis("off")
|
252 |
+
plt.title("Wordcloud", fontdict={"fontsize": 16}, fontweight="heavy", pad=20, y=1.0)
|
253 |
+
return fig
|
254 |
+
|
255 |
+
|
static/en_stopwords.txt
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
if
|
2 |
+
his
|
3 |
+
our
|
4 |
+
they
|
5 |
+
can
|
6 |
+
into
|
7 |
+
an
|
8 |
+
same
|
9 |
+
himself
|
10 |
+
themselves
|
11 |
+
her
|
12 |
+
are
|
13 |
+
such
|
14 |
+
through
|
15 |
+
each
|
16 |
+
when
|
17 |
+
just
|
18 |
+
yourselves
|
19 |
+
hers
|
20 |
+
that
|
21 |
+
with
|
22 |
+
those
|
23 |
+
it
|
24 |
+
was
|
25 |
+
we
|
26 |
+
its
|
27 |
+
me
|
28 |
+
myself
|
29 |
+
ve
|
30 |
+
and
|
31 |
+
itself
|
32 |
+
does
|
33 |
+
doing
|
34 |
+
or
|
35 |
+
being
|
36 |
+
did
|
37 |
+
there
|
38 |
+
while
|
39 |
+
you
|
40 |
+
between
|
41 |
+
about
|
42 |
+
on
|
43 |
+
then
|
44 |
+
my
|
45 |
+
ourselves
|
46 |
+
by
|
47 |
+
too
|
48 |
+
at
|
49 |
+
ours
|
50 |
+
here
|
51 |
+
had
|
52 |
+
been
|
53 |
+
as
|
54 |
+
the
|
55 |
+
has
|
56 |
+
off
|
57 |
+
these
|
58 |
+
other
|
59 |
+
your
|
60 |
+
him
|
61 |
+
herself
|
62 |
+
now
|
63 |
+
is
|
64 |
+
theirs
|
65 |
+
whom
|
66 |
+
any
|
67 |
+
to
|
68 |
+
for
|
69 |
+
from
|
70 |
+
of
|
71 |
+
were
|
72 |
+
have
|
73 |
+
he
|
74 |
+
ll
|
75 |
+
be
|
76 |
+
but
|
77 |
+
until
|
78 |
+
yours
|
79 |
+
this
|
80 |
+
again
|
81 |
+
re
|
82 |
+
do
|
83 |
+
so
|
84 |
+
some
|
85 |
+
both
|
86 |
+
yourself
|
87 |
+
am
|
88 |
+
their
|
89 |
+
having
|
90 |
+
she
|
91 |
+
should
|
92 |
+
them
|
93 |
+
in
|
94 |
+
during
|
95 |
+
will
|
96 |
+
shall
|
97 |
+
could
|
98 |
+
would
|
99 |
+
ai
|
100 |
+
ca
|
101 |
+
sha
|
102 |
+
wo
|
static/en_stopwords_ngram.txt
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
out
|
2 |
+
ll
|
3 |
+
during
|
4 |
+
had
|
5 |
+
but
|
6 |
+
own
|
7 |
+
re
|
8 |
+
there
|
9 |
+
your
|
10 |
+
ourselves
|
11 |
+
ours
|
12 |
+
whom
|
13 |
+
an
|
14 |
+
if
|
15 |
+
as
|
16 |
+
against
|
17 |
+
with
|
18 |
+
in
|
19 |
+
so
|
20 |
+
his
|
21 |
+
were
|
22 |
+
by
|
23 |
+
at
|
24 |
+
theirs
|
25 |
+
they
|
26 |
+
yourselves
|
27 |
+
yours
|
28 |
+
are
|
29 |
+
you
|
30 |
+
could
|
31 |
+
our
|
32 |
+
some
|
33 |
+
ai
|
34 |
+
myself
|
35 |
+
those
|
36 |
+
these
|
37 |
+
who
|
38 |
+
cannot
|
39 |
+
through
|
40 |
+
this
|
41 |
+
very
|
42 |
+
their
|
43 |
+
where
|
44 |
+
only
|
45 |
+
her
|
46 |
+
above
|
47 |
+
down
|
48 |
+
been
|
49 |
+
that
|
50 |
+
will
|
51 |
+
am
|
52 |
+
its
|
53 |
+
up
|
54 |
+
each
|
55 |
+
on
|
56 |
+
no
|
57 |
+
just
|
58 |
+
itself
|
59 |
+
once
|
60 |
+
be
|
61 |
+
from
|
62 |
+
sha
|
63 |
+
himself
|
64 |
+
what
|
65 |
+
for
|
66 |
+
yourself
|
67 |
+
me
|
68 |
+
while
|
69 |
+
being
|
70 |
+
is
|
71 |
+
more
|
72 |
+
here
|
73 |
+
over
|
74 |
+
my
|
75 |
+
would
|
76 |
+
why
|
77 |
+
she
|
78 |
+
he
|
79 |
+
ve
|
80 |
+
to
|
81 |
+
before
|
82 |
+
further
|
83 |
+
it
|
84 |
+
how
|
85 |
+
until
|
86 |
+
should
|
87 |
+
all
|
88 |
+
when
|
89 |
+
again
|
90 |
+
do
|
91 |
+
him
|
92 |
+
both
|
93 |
+
hers
|
94 |
+
too
|
95 |
+
most
|
96 |
+
about
|
97 |
+
same
|
98 |
+
between
|
99 |
+
such
|
100 |
+
shall
|
101 |
+
has
|
102 |
+
which
|
103 |
+
can
|
104 |
+
having
|
105 |
+
few
|
106 |
+
the
|
107 |
+
because
|
108 |
+
did
|
109 |
+
into
|
110 |
+
than
|
111 |
+
them
|
112 |
+
we
|
113 |
+
does
|
114 |
+
below
|
115 |
+
was
|
116 |
+
of
|
117 |
+
off
|
118 |
+
now
|
119 |
+
after
|
120 |
+
under
|
121 |
+
ca
|
122 |
+
any
|
123 |
+
nor
|
124 |
+
not
|
125 |
+
herself
|
126 |
+
ought
|
127 |
+
or
|
128 |
+
themselves
|
129 |
+
other
|
130 |
+
doing
|
131 |
+
then
|
132 |
+
have
|
133 |
+
and
|
134 |
+
wo
|
static/quartzo.ttf
ADDED
Binary file (116 kB). View file
|
|
static/twitter_mask.png
ADDED
![]() |