Lagstill commited on
Commit
e6dc112
β€’
1 Parent(s): c50cb6c

dashboard added

Browse files
Files changed (1) hide show
  1. dashboard.py +111 -0
dashboard.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import praw
7
+ import pandas as pd
8
+ import datetime as dt
9
+ from wordcloud import WordCloud, STOPWORDS
10
+
11
+ reddit = praw.Reddit(client_id='w0cDom4nIf5druip4y9zSw', \
12
+ client_secret='mtCul8hEucwNky7hLwgkewlLPzH0sg', \
13
+ user_agent='Profile extractor', \
14
+ username='CarelessSwordfish541', \
15
+ password='Testing@2022')
16
+
17
+ st.title('Just Reddit as it is πŸ‘€')
18
+
19
+ st.write('This is a simple web app to extract data from Reddit and analyze it.')
20
+
21
+ DATA_URL = 'subreddit_data_v1.csv'
22
+
23
+
24
+
25
+ @st.cache
26
+ def load_data():
27
+ data = pd.read_csv(DATA_URL)
28
+ lowercase = lambda x: str(x).lower()
29
+ data.rename(lowercase, axis='columns', inplace=True)
30
+ return data
31
+
32
+ data_load_state = st.text('Loading data...')
33
+ data = load_data()
34
+ data_load_state.text("Done! (using st.cache)")
35
+
36
+
37
+ if st.checkbox('Show raw data'):
38
+ st.subheader('Raw data')
39
+ st.write(data)
40
+
41
+ subreddit = st.selectbox('Select a subreddit', data['subreddit'].unique())
42
+
43
+ st.subheader('Wordcloud of the most common words in the subreddit')
44
+
45
+
46
+ comment_words = ''
47
+ stopwords = set(STOPWORDS)
48
+
49
+ # iterate through the csv file
50
+ for val in data[data['subreddit'] == subreddit]['title']:
51
+ # typecaste each val to string
52
+ val = str(val)
53
+
54
+ # split the value
55
+ tokens = val.split()
56
+
57
+ # Converts each token into lowercase
58
+ for i in range(len(tokens)):
59
+ tokens[i] = tokens[i].lower()
60
+
61
+ comment_words += " ".join(tokens)+" "
62
+
63
+ wordcloud = WordCloud(width = 800, height = 800,
64
+ background_color ='white',
65
+ stopwords = stopwords,
66
+ min_font_size = 10).generate(comment_words)
67
+
68
+ # plot the WordCloud image
69
+ plt.figure(figsize = (8, 8), facecolor = None)
70
+
71
+ plt.imshow(wordcloud)
72
+
73
+ plt.axis("off")
74
+
75
+ plt.tight_layout(pad = 0)
76
+ st.set_option('deprecation.showPyplotGlobalUse', False)
77
+ st.pyplot()
78
+
79
+
80
+ #Based on the subreddit selected , show the statistics of the subreddit
81
+ st.subheader('Statistics of the subreddit')
82
+ st.write(data[data['subreddit'] == subreddit].describe())
83
+
84
+ #Based on the subreddit selected display the number of posts per day
85
+ st.subheader('Number of posts per day')
86
+ st.write(data[data['subreddit'] == subreddit].groupby('created')['title'].count())
87
+
88
+ #Based on the subreddit selected display the number of comments per day
89
+ st.subheader('Number of comments per day')
90
+ st.write(data[data['subreddit'] == subreddit].groupby('created')['num_comments'].sum())
91
+
92
+ #display a bar chart of the score of the posts
93
+ st.subheader('Score of the posts')
94
+ st.bar_chart(data[data['subreddit'] == subreddit]['score'])
95
+
96
+
97
+
98
+
99
+
100
+ # st.subheader('Number of pickups by hour')
101
+ # hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
102
+ # st.bar_chart(hist_values)
103
+
104
+ # # Some number in the range 0-23
105
+ # hour_to_filter = st.slider('hour', 0, 23, 17)
106
+ # filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
107
+
108
+ # st.subheader('Map of all pickups at %s:00' % hour_to_filter)
109
+ # st.map(filtered_data)
110
+
111
+