Spaces:
Runtime error
Runtime error
make sure the options are author and title columns, modulized comparison 1 and 2
Browse files
app.py
CHANGED
@@ -12,115 +12,118 @@ nlp = spacy.load("en_core_web_md")
|
|
12 |
# Scopus file loading
|
13 |
st.title("Scattertext Analysis")
|
14 |
st.header("Put your file here... ")
|
15 |
-
uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
|
16 |
-
if uploaded_file is not None:
|
17 |
-
# determine file type
|
18 |
-
if uploaded_file.name.endswith(".csv"):
|
19 |
-
df = pd.read_csv(uploaded_file)
|
20 |
-
# preview the uploaded file
|
21 |
-
elif uploaded_file.name.endswith(".txt"):
|
22 |
-
df = pd.read_table(uploaded_file, sep='\t') # Doc: assume contents are seperated by Tabs.
|
23 |
-
# preview the uploaded file
|
24 |
-
else:
|
25 |
-
st.error("Unsupported file format.")
|
26 |
-
|
27 |
-
|
28 |
-
# layout row1
|
29 |
-
row1_col1, row1_col2 = st.columns(2)
|
30 |
-
|
31 |
-
choose_column = ('Abstract', 'Source Title')
|
32 |
-
with row1_col1:
|
33 |
-
choice = st.selectbox("Choose column to analyze", choose_column)
|
34 |
-
|
35 |
-
comparison_options = ('Sources', 'Years')
|
36 |
-
|
37 |
-
with row1_col2:
|
38 |
-
type_of_comparison = st.selectbox("Type of comparison", comparison_options)
|
39 |
-
|
40 |
-
|
41 |
-
if choose_column == 'Abstract':
|
42 |
-
|
43 |
-
# type_of_comparison 1
|
44 |
-
if type_of_comparison == "Sources":
|
45 |
-
row2_col1, row2_col2 = st.columns(2)
|
46 |
-
with row2_col1:
|
47 |
-
first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
|
48 |
-
with row2_col2:
|
49 |
-
second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
|
50 |
-
key='second_source_select')
|
51 |
-
|
52 |
-
# filter data
|
53 |
-
first_data = df[df['Source title'] == first_source].copy()
|
54 |
-
second_data = df[df['Source title'] == second_source].copy()
|
55 |
-
filtered_data = pd.concat([first_data, second_data])
|
56 |
-
st.write(filtered_data)
|
57 |
-
|
58 |
-
if st.button("Generate the Scattertext Plot"):
|
59 |
-
# make plot
|
60 |
-
corpus = sct.CorpusFromPandas(
|
61 |
-
filtered_data,
|
62 |
-
category_col="Source title",
|
63 |
-
text_col='Abstract',
|
64 |
-
nlp=nlp,
|
65 |
-
).build()
|
66 |
-
# generate HTML visualization
|
67 |
-
html = sct.produce_scattertext_explorer(corpus,
|
68 |
-
category=first_source,
|
69 |
-
category_name=first_source,
|
70 |
-
not_category_name=second_source,
|
71 |
-
width_in_pixels=900,
|
72 |
-
minimum_term_frequency=0,
|
73 |
-
metadata=filtered_data)
|
74 |
-
st.components.v1.html(html, width=1000, height=600)
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
|
78 |
# type_of_comparison 2
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
|
126 |
|
|
|
12 |
# Scopus file loading
|
13 |
st.title("Scattertext Analysis")
|
14 |
st.header("Put your file here... ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
def compatison1(selected_column):
|
17 |
+
# type_of_comparison 1
|
18 |
+
row2_col1, row2_col2 = st.columns(2)
|
19 |
+
with row2_col1:
|
20 |
+
first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
|
21 |
+
with row2_col2:
|
22 |
+
second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
|
23 |
+
key='second_source_select')
|
24 |
+
|
25 |
+
# filter data
|
26 |
+
first_data = df[df['Source title'] == first_source].copy()
|
27 |
+
second_data = df[df['Source title'] == second_source].copy()
|
28 |
+
filtered_data = pd.concat([first_data, second_data])
|
29 |
+
st.write(filtered_data)
|
30 |
+
|
31 |
+
if st.button("Generate the Scattertext Plot"):
|
32 |
+
# make plot
|
33 |
+
corpus = sct.CorpusFromPandas(
|
34 |
+
filtered_data,
|
35 |
+
category_col="Source title",
|
36 |
+
text_col= selected_column,
|
37 |
+
nlp=nlp,
|
38 |
+
).build()
|
39 |
+
# generate HTML visualization
|
40 |
+
html = sct.produce_scattertext_explorer(corpus,
|
41 |
+
category=first_source,
|
42 |
+
category_name=first_source,
|
43 |
+
not_category_name=second_source,
|
44 |
+
width_in_pixels=900,
|
45 |
+
minimum_term_frequency=0,
|
46 |
+
metadata=filtered_data)
|
47 |
+
st.components.v1.html(html, width=1000, height=600)
|
48 |
+
return
|
49 |
|
50 |
|
51 |
# type_of_comparison 2
|
52 |
+
def comparison2(selected_column):
|
53 |
+
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
|
54 |
+
df.dropna(subset=['Year'], inplace=True)
|
55 |
+
df['Year'] = df['Year'].astype(int)
|
56 |
+
|
57 |
+
min_year = int(df['Year'].min())
|
58 |
+
max_year = int(df['Year'].max())
|
59 |
+
# layout row2
|
60 |
+
row2_col1, row2_col2 = st.columns(2)
|
61 |
+
with row2_col1:
|
62 |
+
first_range = st.slider("First range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
|
63 |
+
with row2_col2:
|
64 |
+
second_range = st.slider("Second range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
|
65 |
+
|
66 |
+
# filter data
|
67 |
+
first_range_filter_df = df[(df['Year'] >= first_range[0]) & (df['Year'] <= first_range[1])].copy()
|
68 |
+
first_range_filter_df['Topic Range'] = 'First range'
|
69 |
+
|
70 |
+
second_range_filter_df = df[(df['Year'] >= second_range[0]) & (df['Year'] <= second_range[1])].copy()
|
71 |
+
second_range_filter_df['Topic Range'] = 'Second range'
|
72 |
+
|
73 |
+
filtered_df = pd.concat([first_range_filter_df, second_range_filter_df])
|
74 |
+
st.write(filtered_df)
|
75 |
+
|
76 |
+
if st.button("Generate the Scattertext Plot"):
|
77 |
+
# make plot
|
78 |
+
corpus = sct.CorpusFromPandas(
|
79 |
+
filtered_df,
|
80 |
+
category_col="Topic Range",
|
81 |
+
text_col= selected_column,
|
82 |
+
nlp=nlp,
|
83 |
+
).build()
|
84 |
+
# generate HTML visualization
|
85 |
+
html = sct.produce_scattertext_explorer(corpus,
|
86 |
+
category='First range',
|
87 |
+
category_name='First range',
|
88 |
+
not_category_name='Second range',
|
89 |
+
width_in_pixels=900,
|
90 |
+
minimum_term_frequency=0,
|
91 |
+
metadata=filtered_df)
|
92 |
+
st.components.v1.html(html, width=1000, height=600)
|
93 |
+
return
|
94 |
+
|
95 |
+
|
96 |
+
if __name__ == '__main__':
|
97 |
+
uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
|
98 |
+
if uploaded_file is not None:
|
99 |
+
# determine file type
|
100 |
+
if uploaded_file.name.endswith(".csv"):
|
101 |
+
df = pd.read_csv(uploaded_file)
|
102 |
+
abstract_col = 'Abstract'
|
103 |
+
title_col = 'Title'
|
104 |
+
# preview the uploaded file
|
105 |
+
elif uploaded_file.name.endswith(".txt"):
|
106 |
+
df = pd.read_table(uploaded_file, sep='\t') # Doc: assume contents are seperated by Tabs.
|
107 |
+
abstract_col = 'AB'
|
108 |
+
title_col = 'TI'
|
109 |
+
# preview the uploaded file
|
110 |
+
else:
|
111 |
+
st.error("Unsupported file format.")
|
112 |
+
|
113 |
+
column_choices = (abstract_col, title_col)
|
114 |
+
|
115 |
+
# layout row1
|
116 |
+
row1_col1, row1_col2 = st.columns(2)
|
117 |
+
with row1_col1:
|
118 |
+
choice = st.selectbox("Choose column to analyze", column_choices)
|
119 |
+
|
120 |
+
comparison_options = ('Sources', 'Years')
|
121 |
+
with row1_col2:
|
122 |
+
type_of_comparison = st.selectbox("Type of comparison", comparison_options)
|
123 |
+
if type_of_comparison == 'Sources':
|
124 |
+
compatison1(column_choices)
|
125 |
+
if type_of_comparison == 'Years':
|
126 |
+
comparison2(column_choices)
|
127 |
|
128 |
|
129 |
|