Oliviayc commited on
Commit
95f5cd3
·
verified ·
1 Parent(s): 2cb8bbb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import scattertext as stx
3
+ import spacy
4
+ import pandas as pd
5
+ import en_core_web_md
6
+
7
+
8
+
9
+ # load language model:
10
+ nlp = en_core_web_md.load()
11
+ nlp = spacy.load("en_core_web_md")
12
+
13
+ # config
14
+ st.title("Scattertext Analysis")
15
+ # TODO:update other web settings
16
+
17
+ # upload file
18
+ uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
19
+ # read data
20
+ if uploaded_file is not None:
21
+ # choose function
22
+ function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])
23
+
24
+ # function1: generate plot from customized file
25
+ if st.button('Customized'):
26
+ # proceed data
27
+ if uploaded_file.name.endswith(".csv"):
28
+ df = pd.read_csv(uploaded_file)
29
+ elif uploaded_file.name.endswith(".txt"):
30
+ df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs.
31
+
32
+ chosen_column = st.selectbox("Choose text column for analysis", df.columns)
33
+ # convert to scattertext corpus
34
+ corpus = stx.CorpusFromPandas(
35
+ df,
36
+ category_col=df.head(),
37
+ text_col=df[chosen_column],
38
+ nlp=nlp,
39
+ ). build()
40
+ # create visualization
41
+ # customize parameters
42
+ customize_category_name = st.text_input('Enter the category name')
43
+ customize_non_category_name = st.text_input('Enter the non-category name')
44
+ html = stx.produce_scattertext_explorer(corpus,
45
+ category=chosen_column,
46
+ category_name=customize_category_name,
47
+ not_category_name=customize_non_category_name,
48
+ width_in_pixels=1000,
49
+ minimum_term_frequency=0,
50
+ metadata=df)
51
+ st.components.v1.html(html)
52
+ else:
53
+ st.error("Unsupported file format.")
54
+
55
+ # function2: generate plot from databases
56
+ elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract.
57
+ # scopus & lens
58
+ if uploaded_file.name.endswith(".csv"):
59
+ df = pd.read_csv(uploaded_file)
60
+ chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
61
+ if chosen_column == 'Abstract':
62
+ st.write("This column cannot be selected, please select again")
63
+ else:
64
+ # make plot
65
+ corpus = stx.CorpusFromPandas(
66
+ df,
67
+ category_col=df[chosen_column],
68
+ text_col='Abstract',
69
+ nlp=nlp,
70
+ ).build()
71
+ # generate HTML visualization
72
+ input_category_name = input('Enter the category name')
73
+ customize_category_name = st.text_input('Customize parameter', input_category_name)
74
+ input_non_category_name = input('Enter the non-category name')
75
+ customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
76
+ html = stx.produce_scattertext_explorer(corpus,
77
+ category=chosen_column,
78
+ category_name=customize_category_name,
79
+ not_category_name=customize_non_category_name,
80
+ width_in_pixels=1000,
81
+ minimum_term_frequency=0,
82
+ metadata=df)
83
+ st.components.v1.html(html)
84
+
85
+ # web of science
86
+ elif uploaded_file.name.endswith(".txt"):
87
+ df = pd.read_table(uploaded_file, sep='\t')
88
+ chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
89
+ if chosen_column == 'AB':
90
+ st.write("This column cannot be selected, please select again")
91
+ else:
92
+ # make plot
93
+ corpus = stx.CorpusFromPandas(
94
+ df,
95
+ category_col=df[chosen_column],
96
+ text_col='Abstract',
97
+ nlp=nlp,
98
+ ).build()
99
+
100
+ # generate HTML visualization
101
+ input_category_name = input('Enter the category name')
102
+ customize_category_name = st.text_input('Customize parameter', input_category_name)
103
+ input_non_category_name = input('Enter the non-category name')
104
+ customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
105
+ html = stx.produce_scattertext_explorer(corpus,
106
+ category=chosen_column,
107
+ category_name=customize_category_name,
108
+ not_category_name=customize_non_category_name,
109
+ width_in_pixels=1000,
110
+ minimum_term_frequency=0,
111
+ metadata=df)
112
+
113
+ st.components.v1.html(html)
114
+
115
+
116
+ else:
117
+ st.error("Unsupported file format.")
118
+
119
+ else:
120
+ st.write("Please upload a CSV or TXT file to begin.")