Spaces:
Running
Running
Update pages/4 Sunburst.py
Browse files- pages/4 Sunburst.py +68 -63
pages/4 Sunburst.py
CHANGED
@@ -37,7 +37,7 @@ st.subheader('Put your file here...', anchor=False)
|
|
37 |
|
38 |
#===clear cache===
|
39 |
def reset_all():
|
40 |
-
|
41 |
|
42 |
#===check type===
|
43 |
@st.cache_data(ttl=3600)
|
@@ -50,8 +50,8 @@ def upload(extype):
|
|
50 |
papers = pd.read_csv(uploaded_file)
|
51 |
#lens.org
|
52 |
if 'Publication Year' in papers.columns:
|
53 |
-
|
54 |
-
|
55 |
return papers
|
56 |
|
57 |
@st.cache_data(ttl=3600)
|
@@ -72,68 +72,73 @@ def conv_txt(extype):
|
|
72 |
uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
|
73 |
|
74 |
if uploaded_file is not None:
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
@st.cache_data(ttl=3600)
|
83 |
-
def get_minmax(extype):
|
84 |
-
extype = extype
|
85 |
-
MIN = int(papers['Year'].min())
|
86 |
-
MAX = int(papers['Year'].max())
|
87 |
-
GAP = MAX - MIN
|
88 |
-
return papers, MIN, MAX, GAP
|
89 |
-
|
90 |
-
tab1, tab2 = st.tabs(["π Generate visualization", "π Recommended Reading"])
|
91 |
-
|
92 |
-
with tab1:
|
93 |
-
#===sunburst===
|
94 |
-
try:
|
95 |
-
papers, MIN, MAX, GAP = get_minmax(extype)
|
96 |
-
except KeyError:
|
97 |
-
st.error('Error: Please check again your columns.')
|
98 |
-
sys.exit(1)
|
99 |
-
|
100 |
-
if (GAP != 0):
|
101 |
-
YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
|
102 |
-
else:
|
103 |
-
st.write('You only have data in ', (MAX))
|
104 |
-
YEAR = (MIN, MAX)
|
105 |
|
106 |
@st.cache_data(ttl=3600)
|
107 |
-
def
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
112 |
|
113 |
-
|
114 |
-
def vis_sunbrust(extype):
|
115 |
-
papers['Cited by'] = papers['Cited by'].fillna(0)
|
116 |
-
vis = pd.DataFrame()
|
117 |
-
vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
|
118 |
-
viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()
|
119 |
-
viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
|
120 |
-
|
121 |
-
fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
|
122 |
-
color='cited by',
|
123 |
-
color_continuous_scale='RdBu',
|
124 |
-
color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
|
125 |
-
fig.update_layout(height=800, width=1200)
|
126 |
-
return fig
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
|
138 |
-
st.
|
139 |
-
st.
|
|
|
37 |
|
38 |
#===clear cache===
|
39 |
def reset_all():
|
40 |
+
st.cache_data.clear()
|
41 |
|
42 |
#===check type===
|
43 |
@st.cache_data(ttl=3600)
|
|
|
50 |
papers = pd.read_csv(uploaded_file)
|
51 |
#lens.org
|
52 |
if 'Publication Year' in papers.columns:
|
53 |
+
papers.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
|
54 |
+
'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
|
55 |
return papers
|
56 |
|
57 |
@st.cache_data(ttl=3600)
|
|
|
72 |
uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
|
73 |
|
74 |
if uploaded_file is not None:
|
75 |
+
try:
|
76 |
+
extype = get_ext(uploaded_file)
|
77 |
+
if extype.endswith('.csv'):
|
78 |
+
papers = upload(extype)
|
79 |
+
|
80 |
+
elif extype.endswith('.txt'):
|
81 |
+
papers = conv_txt(extype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
@st.cache_data(ttl=3600)
|
84 |
+
def get_minmax(extype):
|
85 |
+
extype = extype
|
86 |
+
MIN = int(papers['Year'].min())
|
87 |
+
MAX = int(papers['Year'].max())
|
88 |
+
GAP = MAX - MIN
|
89 |
+
return papers, MIN, MAX, GAP
|
90 |
|
91 |
+
tab1, tab2 = st.tabs(["π Generate visualization", "π Recommended Reading"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
with tab1:
|
94 |
+
#===sunburst===
|
95 |
+
try:
|
96 |
+
papers, MIN, MAX, GAP = get_minmax(extype)
|
97 |
+
except KeyError:
|
98 |
+
st.error('Error: Please check again your columns.')
|
99 |
+
sys.exit(1)
|
100 |
+
|
101 |
+
if (GAP != 0):
|
102 |
+
YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
|
103 |
+
else:
|
104 |
+
st.write('You only have data in ', (MAX))
|
105 |
+
YEAR = (MIN, MAX)
|
106 |
+
|
107 |
+
@st.cache_data(ttl=3600)
|
108 |
+
def listyear(extype):
|
109 |
+
global papers
|
110 |
+
years = list(range(YEAR[0],YEAR[1]+1))
|
111 |
+
papers = papers.loc[papers['Year'].isin(years)]
|
112 |
+
return years, papers
|
113 |
+
|
114 |
+
@st.cache_data(ttl=3600)
|
115 |
+
def vis_sunbrust(extype):
|
116 |
+
papers['Cited by'] = papers['Cited by'].fillna(0)
|
117 |
+
vis = pd.DataFrame()
|
118 |
+
vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
|
119 |
+
viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()
|
120 |
+
viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
|
121 |
+
|
122 |
+
fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
|
123 |
+
color='cited by',
|
124 |
+
color_continuous_scale='RdBu',
|
125 |
+
color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
|
126 |
+
fig.update_layout(height=800, width=1200)
|
127 |
+
return fig
|
128 |
+
|
129 |
+
years, papers = listyear(extype)
|
130 |
+
|
131 |
+
if {'Document Type','Source title','Cited by','Year'}.issubset(papers.columns):
|
132 |
+
fig = vis_sunbrust(extype)
|
133 |
+
st.plotly_chart(fig, height=800, width=1200) #use_container_width=True)
|
134 |
+
|
135 |
+
else:
|
136 |
+
st.error('We require these columns: Document Type, Source title, Cited by, Year', icon="π¨")
|
137 |
+
|
138 |
+
with tab2:
|
139 |
+
st.markdown('**numpy.average β NumPy v1.24 Manual. (n.d.). Numpy.Average β NumPy v1.24 Manual.** https://numpy.org/doc/stable/reference/generated/numpy.average.html')
|
140 |
+
st.markdown('**Sunburst. (n.d.). Sunburst Charts in Python.** https://plotly.com/python/sunburst-charts/')
|
141 |
|
142 |
+
except:
|
143 |
+
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="π¨")
|
144 |
+
st.stop()
|