Oliviayc commited on
Commit
ab928f5
·
1 Parent(s): 50c295e

make sure the options are author and title columns, modulized comparison 1 and 2

Browse files
Files changed (1) hide show
  1. app.py +108 -105
app.py CHANGED
@@ -12,115 +12,118 @@ nlp = spacy.load("en_core_web_md")
12
  # Scopus file loading
13
  st.title("Scattertext Analysis")
14
  st.header("Put your file here... ")
15
- uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
16
- if uploaded_file is not None:
17
- # determine file type
18
- if uploaded_file.name.endswith(".csv"):
19
- df = pd.read_csv(uploaded_file)
20
- # preview the uploaded file
21
- elif uploaded_file.name.endswith(".txt"):
22
- df = pd.read_table(uploaded_file, sep='\t') # Doc: assume contents are seperated by Tabs.
23
- # preview the uploaded file
24
- else:
25
- st.error("Unsupported file format.")
26
-
27
-
28
- # layout row1
29
- row1_col1, row1_col2 = st.columns(2)
30
-
31
- choose_column = ('Abstract', 'Source Title')
32
- with row1_col1:
33
- choice = st.selectbox("Choose column to analyze", choose_column)
34
-
35
- comparison_options = ('Sources', 'Years')
36
-
37
- with row1_col2:
38
- type_of_comparison = st.selectbox("Type of comparison", comparison_options)
39
-
40
-
41
- if choose_column == 'Abstract':
42
-
43
- # type_of_comparison 1
44
- if type_of_comparison == "Sources":
45
- row2_col1, row2_col2 = st.columns(2)
46
- with row2_col1:
47
- first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
48
- with row2_col2:
49
- second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
50
- key='second_source_select')
51
-
52
- # filter data
53
- first_data = df[df['Source title'] == first_source].copy()
54
- second_data = df[df['Source title'] == second_source].copy()
55
- filtered_data = pd.concat([first_data, second_data])
56
- st.write(filtered_data)
57
-
58
- if st.button("Generate the Scattertext Plot"):
59
- # make plot
60
- corpus = sct.CorpusFromPandas(
61
- filtered_data,
62
- category_col="Source title",
63
- text_col='Abstract',
64
- nlp=nlp,
65
- ).build()
66
- # generate HTML visualization
67
- html = sct.produce_scattertext_explorer(corpus,
68
- category=first_source,
69
- category_name=first_source,
70
- not_category_name=second_source,
71
- width_in_pixels=900,
72
- minimum_term_frequency=0,
73
- metadata=filtered_data)
74
- st.components.v1.html(html, width=1000, height=600)
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
  # type_of_comparison 2
79
- if type_of_comparison == "Years":
80
- df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
81
- df.dropna(subset=['Year'], inplace=True)
82
- df['Year'] = df['Year'].astype(int)
83
-
84
- min_year = int(df['Year'].min())
85
- max_year = int(df['Year'].max())
86
- # layout row2
87
- row2_col1, row2_col2 = st.columns(2)
88
- with row2_col1:
89
- first_range = st.slider("First range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
90
- with row2_col2:
91
- second_range = st.slider("Second range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
92
-
93
- # filter data
94
- first_range_filter_df = df[(df['Year'] >= first_range[0]) & (df['Year'] <= first_range[1])].copy()
95
- first_range_filter_df['Topic Range'] = 'First range'
96
-
97
- second_range_filter_df = df[(df['Year'] >= second_range[0]) & (df['Year'] <= second_range[1])].copy()
98
- second_range_filter_df['Topic Range'] = 'Second range'
99
-
100
- filtered_df = pd.concat([first_range_filter_df, second_range_filter_df])
101
- st.write(filtered_df)
102
-
103
- if st.button("Generate the Scattertext Plot"):
104
- # make plot
105
- corpus = sct.CorpusFromPandas(
106
- filtered_df,
107
- category_col="Topic Range",
108
- text_col='Abstract',
109
- nlp=nlp,
110
- ).build()
111
- # generate HTML visualization
112
- html = sct.produce_scattertext_explorer(corpus,
113
- category='First range',
114
- category_name='First range',
115
- not_category_name='Second range',
116
- width_in_pixels=900,
117
- minimum_term_frequency=0,
118
- metadata=filtered_df)
119
- st.components.v1.html(html, width=1000, height=600)
120
-
121
-
122
-
123
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
 
 
12
  # Scopus file loading
13
  st.title("Scattertext Analysis")
14
  st.header("Put your file here... ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ def compatison1(selected_column):
17
+ # type_of_comparison 1
18
+ row2_col1, row2_col2 = st.columns(2)
19
+ with row2_col1:
20
+ first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
21
+ with row2_col2:
22
+ second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
23
+ key='second_source_select')
24
+
25
+ # filter data
26
+ first_data = df[df['Source title'] == first_source].copy()
27
+ second_data = df[df['Source title'] == second_source].copy()
28
+ filtered_data = pd.concat([first_data, second_data])
29
+ st.write(filtered_data)
30
+
31
+ if st.button("Generate the Scattertext Plot"):
32
+ # make plot
33
+ corpus = sct.CorpusFromPandas(
34
+ filtered_data,
35
+ category_col="Source title",
36
+ text_col= selected_column,
37
+ nlp=nlp,
38
+ ).build()
39
+ # generate HTML visualization
40
+ html = sct.produce_scattertext_explorer(corpus,
41
+ category=first_source,
42
+ category_name=first_source,
43
+ not_category_name=second_source,
44
+ width_in_pixels=900,
45
+ minimum_term_frequency=0,
46
+ metadata=filtered_data)
47
+ st.components.v1.html(html, width=1000, height=600)
48
+ return
49
 
50
 
51
  # type_of_comparison 2
52
+ def comparison2(selected_column):
53
+ df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
54
+ df.dropna(subset=['Year'], inplace=True)
55
+ df['Year'] = df['Year'].astype(int)
56
+
57
+ min_year = int(df['Year'].min())
58
+ max_year = int(df['Year'].max())
59
+ # layout row2
60
+ row2_col1, row2_col2 = st.columns(2)
61
+ with row2_col1:
62
+ first_range = st.slider("First range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
63
+ with row2_col2:
64
+ second_range = st.slider("Second range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
65
+
66
+ # filter data
67
+ first_range_filter_df = df[(df['Year'] >= first_range[0]) & (df['Year'] <= first_range[1])].copy()
68
+ first_range_filter_df['Topic Range'] = 'First range'
69
+
70
+ second_range_filter_df = df[(df['Year'] >= second_range[0]) & (df['Year'] <= second_range[1])].copy()
71
+ second_range_filter_df['Topic Range'] = 'Second range'
72
+
73
+ filtered_df = pd.concat([first_range_filter_df, second_range_filter_df])
74
+ st.write(filtered_df)
75
+
76
+ if st.button("Generate the Scattertext Plot"):
77
+ # make plot
78
+ corpus = sct.CorpusFromPandas(
79
+ filtered_df,
80
+ category_col="Topic Range",
81
+ text_col= selected_column,
82
+ nlp=nlp,
83
+ ).build()
84
+ # generate HTML visualization
85
+ html = sct.produce_scattertext_explorer(corpus,
86
+ category='First range',
87
+ category_name='First range',
88
+ not_category_name='Second range',
89
+ width_in_pixels=900,
90
+ minimum_term_frequency=0,
91
+ metadata=filtered_df)
92
+ st.components.v1.html(html, width=1000, height=600)
93
+ return
94
+
95
+
96
+ if __name__ == '__main__':
97
+ uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
98
+ if uploaded_file is not None:
99
+ # determine file type
100
+ if uploaded_file.name.endswith(".csv"):
101
+ df = pd.read_csv(uploaded_file)
102
+ abstract_col = 'Abstract'
103
+ title_col = 'Title'
104
+ # preview the uploaded file
105
+ elif uploaded_file.name.endswith(".txt"):
106
+ df = pd.read_table(uploaded_file, sep='\t') # Doc: assume contents are seperated by Tabs.
107
+ abstract_col = 'AB'
108
+ title_col = 'TI'
109
+ # preview the uploaded file
110
+ else:
111
+ st.error("Unsupported file format.")
112
+
113
+ column_choices = (abstract_col, title_col)
114
+
115
+ # layout row1
116
+ row1_col1, row1_col2 = st.columns(2)
117
+ with row1_col1:
118
+ choice = st.selectbox("Choose column to analyze", column_choices)
119
+
120
+ comparison_options = ('Sources', 'Years')
121
+ with row1_col2:
122
+ type_of_comparison = st.selectbox("Type of comparison", comparison_options)
123
+ if type_of_comparison == 'Sources':
124
+ compatison1(column_choices)
125
+ if type_of_comparison == 'Years':
126
+ comparison2(column_choices)
127
 
128
 
129