faizhalas commited on
Commit
0ce9fb5
Β·
verified Β·
1 Parent(s): 79c577b

Update pages/4 Sunburst.py

Browse files
Files changed (1) hide show
  1. pages/4 Sunburst.py +68 -63
pages/4 Sunburst.py CHANGED
@@ -37,7 +37,7 @@ st.subheader('Put your file here...', anchor=False)
37
 
38
  #===clear cache===
39
  def reset_all():
40
- st.cache_data.clear()
41
 
42
  #===check type===
43
  @st.cache_data(ttl=3600)
@@ -50,8 +50,8 @@ def upload(extype):
50
  papers = pd.read_csv(uploaded_file)
51
  #lens.org
52
  if 'Publication Year' in papers.columns:
53
- papers.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
54
- 'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
55
  return papers
56
 
57
  @st.cache_data(ttl=3600)
@@ -72,68 +72,73 @@ def conv_txt(extype):
72
  uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
73
 
74
  if uploaded_file is not None:
75
- extype = get_ext(uploaded_file)
76
- if extype.endswith('.csv'):
77
- papers = upload(extype)
78
-
79
- elif extype.endswith('.txt'):
80
- papers = conv_txt(extype)
81
-
82
- @st.cache_data(ttl=3600)
83
- def get_minmax(extype):
84
- extype = extype
85
- MIN = int(papers['Year'].min())
86
- MAX = int(papers['Year'].max())
87
- GAP = MAX - MIN
88
- return papers, MIN, MAX, GAP
89
-
90
- tab1, tab2 = st.tabs(["πŸ“ˆ Generate visualization", "πŸ““ Recommended Reading"])
91
-
92
- with tab1:
93
- #===sunburst===
94
- try:
95
- papers, MIN, MAX, GAP = get_minmax(extype)
96
- except KeyError:
97
- st.error('Error: Please check again your columns.')
98
- sys.exit(1)
99
-
100
- if (GAP != 0):
101
- YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
102
- else:
103
- st.write('You only have data in ', (MAX))
104
- YEAR = (MIN, MAX)
105
 
106
  @st.cache_data(ttl=3600)
107
- def listyear(extype):
108
- global papers
109
- years = list(range(YEAR[0],YEAR[1]+1))
110
- papers = papers.loc[papers['Year'].isin(years)]
111
- return years, papers
 
112
 
113
- @st.cache_data(ttl=3600)
114
- def vis_sunbrust(extype):
115
- papers['Cited by'] = papers['Cited by'].fillna(0)
116
- vis = pd.DataFrame()
117
- vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
118
- viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()
119
- viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
120
-
121
- fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
122
- color='cited by',
123
- color_continuous_scale='RdBu',
124
- color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
125
- fig.update_layout(height=800, width=1200)
126
- return fig
127
 
128
- years, papers = listyear(extype)
129
-
130
- if {'Document Type','Source title','Cited by','Year'}.issubset(papers.columns):
131
- fig = vis_sunbrust(extype)
132
- st.plotly_chart(fig, height=800, width=1200) #use_container_width=True)
133
-
134
- else:
135
- st.error('We require these columns: Document Type, Source title, Cited by, Year', icon="🚨")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- with tab2:
138
- st.markdown('**numpy.average β€” NumPy v1.24 Manual. (n.d.). Numpy.Average β€” NumPy v1.24 Manual.** https://numpy.org/doc/stable/reference/generated/numpy.average.html')
139
- st.markdown('**Sunburst. (n.d.). Sunburst Charts in Python.** https://plotly.com/python/sunburst-charts/')
 
37
 
38
  #===clear cache===
39
  def reset_all():
40
+ st.cache_data.clear()
41
 
42
  #===check type===
43
  @st.cache_data(ttl=3600)
 
50
  papers = pd.read_csv(uploaded_file)
51
  #lens.org
52
  if 'Publication Year' in papers.columns:
53
+ papers.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
54
+ 'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
55
  return papers
56
 
57
  @st.cache_data(ttl=3600)
 
72
  uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
73
 
74
  if uploaded_file is not None:
75
+ try:
76
+ extype = get_ext(uploaded_file)
77
+ if extype.endswith('.csv'):
78
+ papers = upload(extype)
79
+
80
+ elif extype.endswith('.txt'):
81
+ papers = conv_txt(extype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  @st.cache_data(ttl=3600)
84
+ def get_minmax(extype):
85
+ extype = extype
86
+ MIN = int(papers['Year'].min())
87
+ MAX = int(papers['Year'].max())
88
+ GAP = MAX - MIN
89
+ return papers, MIN, MAX, GAP
90
 
91
+ tab1, tab2 = st.tabs(["πŸ“ˆ Generate visualization", "πŸ““ Recommended Reading"])
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ with tab1:
94
+ #===sunburst===
95
+ try:
96
+ papers, MIN, MAX, GAP = get_minmax(extype)
97
+ except KeyError:
98
+ st.error('Error: Please check again your columns.')
99
+ sys.exit(1)
100
+
101
+ if (GAP != 0):
102
+ YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
103
+ else:
104
+ st.write('You only have data in ', (MAX))
105
+ YEAR = (MIN, MAX)
106
+
107
+ @st.cache_data(ttl=3600)
108
+ def listyear(extype):
109
+ global papers
110
+ years = list(range(YEAR[0],YEAR[1]+1))
111
+ papers = papers.loc[papers['Year'].isin(years)]
112
+ return years, papers
113
+
114
+ @st.cache_data(ttl=3600)
115
+ def vis_sunbrust(extype):
116
+ papers['Cited by'] = papers['Cited by'].fillna(0)
117
+ vis = pd.DataFrame()
118
+ vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
119
+ viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()
120
+ viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
121
+
122
+ fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
123
+ color='cited by',
124
+ color_continuous_scale='RdBu',
125
+ color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
126
+ fig.update_layout(height=800, width=1200)
127
+ return fig
128
+
129
+ years, papers = listyear(extype)
130
+
131
+ if {'Document Type','Source title','Cited by','Year'}.issubset(papers.columns):
132
+ fig = vis_sunbrust(extype)
133
+ st.plotly_chart(fig, height=800, width=1200) #use_container_width=True)
134
+
135
+ else:
136
+ st.error('We require these columns: Document Type, Source title, Cited by, Year', icon="🚨")
137
+
138
+ with tab2:
139
+ st.markdown('**numpy.average β€” NumPy v1.24 Manual. (n.d.). Numpy.Average β€” NumPy v1.24 Manual.** https://numpy.org/doc/stable/reference/generated/numpy.average.html')
140
+ st.markdown('**Sunburst. (n.d.). Sunburst Charts in Python.** https://plotly.com/python/sunburst-charts/')
141
 
142
+ except:
143
+ st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
144
+ st.stop()