Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import scattertext as stx
|
3 |
+
import spacy
|
4 |
+
import pandas as pd
|
5 |
+
import en_core_web_md
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
# load language model:
|
10 |
+
nlp = en_core_web_md.load()
|
11 |
+
nlp = spacy.load("en_core_web_md")
|
12 |
+
|
13 |
+
# config
|
14 |
+
st.title("Scattertext Analysis")
|
15 |
+
# TODO:update other web settings
|
16 |
+
|
17 |
+
# upload file
|
18 |
+
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
|
19 |
+
# read data
|
20 |
+
if uploaded_file is not None:
|
21 |
+
# choose function
|
22 |
+
function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])
|
23 |
+
|
24 |
+
# function1: generate plot from customized file
|
25 |
+
if st.button('Customized'):
|
26 |
+
# proceed data
|
27 |
+
if uploaded_file.name.endswith(".csv"):
|
28 |
+
df = pd.read_csv(uploaded_file)
|
29 |
+
elif uploaded_file.name.endswith(".txt"):
|
30 |
+
df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs.
|
31 |
+
|
32 |
+
chosen_column = st.selectbox("Choose text column for analysis", df.columns)
|
33 |
+
# convert to scattertext corpus
|
34 |
+
corpus = stx.CorpusFromPandas(
|
35 |
+
df,
|
36 |
+
category_col=df.head(),
|
37 |
+
text_col=df[chosen_column],
|
38 |
+
nlp=nlp,
|
39 |
+
). build()
|
40 |
+
# create visualization
|
41 |
+
# customize parameters
|
42 |
+
customize_category_name = st.text_input('Enter the category name')
|
43 |
+
customize_non_category_name = st.text_input('Enter the non-category name')
|
44 |
+
html = stx.produce_scattertext_explorer(corpus,
|
45 |
+
category=chosen_column,
|
46 |
+
category_name=customize_category_name,
|
47 |
+
not_category_name=customize_non_category_name,
|
48 |
+
width_in_pixels=1000,
|
49 |
+
minimum_term_frequency=0,
|
50 |
+
metadata=df)
|
51 |
+
st.components.v1.html(html)
|
52 |
+
else:
|
53 |
+
st.error("Unsupported file format.")
|
54 |
+
|
55 |
+
# function2: generate plot from databases
|
56 |
+
elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract.
|
57 |
+
# scopus & lens
|
58 |
+
if uploaded_file.name.endswith(".csv"):
|
59 |
+
df = pd.read_csv(uploaded_file)
|
60 |
+
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
|
61 |
+
if chosen_column == 'Abstract':
|
62 |
+
st.write("This column cannot be selected, please select again")
|
63 |
+
else:
|
64 |
+
# make plot
|
65 |
+
corpus = stx.CorpusFromPandas(
|
66 |
+
df,
|
67 |
+
category_col=df[chosen_column],
|
68 |
+
text_col='Abstract',
|
69 |
+
nlp=nlp,
|
70 |
+
).build()
|
71 |
+
# generate HTML visualization
|
72 |
+
input_category_name = input('Enter the category name')
|
73 |
+
customize_category_name = st.text_input('Customize parameter', input_category_name)
|
74 |
+
input_non_category_name = input('Enter the non-category name')
|
75 |
+
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
|
76 |
+
html = stx.produce_scattertext_explorer(corpus,
|
77 |
+
category=chosen_column,
|
78 |
+
category_name=customize_category_name,
|
79 |
+
not_category_name=customize_non_category_name,
|
80 |
+
width_in_pixels=1000,
|
81 |
+
minimum_term_frequency=0,
|
82 |
+
metadata=df)
|
83 |
+
st.components.v1.html(html)
|
84 |
+
|
85 |
+
# web of science
|
86 |
+
elif uploaded_file.name.endswith(".txt"):
|
87 |
+
df = pd.read_table(uploaded_file, sep='\t')
|
88 |
+
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
|
89 |
+
if chosen_column == 'AB':
|
90 |
+
st.write("This column cannot be selected, please select again")
|
91 |
+
else:
|
92 |
+
# make plot
|
93 |
+
corpus = stx.CorpusFromPandas(
|
94 |
+
df,
|
95 |
+
category_col=df[chosen_column],
|
96 |
+
text_col='Abstract',
|
97 |
+
nlp=nlp,
|
98 |
+
).build()
|
99 |
+
|
100 |
+
# generate HTML visualization
|
101 |
+
input_category_name = input('Enter the category name')
|
102 |
+
customize_category_name = st.text_input('Customize parameter', input_category_name)
|
103 |
+
input_non_category_name = input('Enter the non-category name')
|
104 |
+
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
|
105 |
+
html = stx.produce_scattertext_explorer(corpus,
|
106 |
+
category=chosen_column,
|
107 |
+
category_name=customize_category_name,
|
108 |
+
not_category_name=customize_non_category_name,
|
109 |
+
width_in_pixels=1000,
|
110 |
+
minimum_term_frequency=0,
|
111 |
+
metadata=df)
|
112 |
+
|
113 |
+
st.components.v1.html(html)
|
114 |
+
|
115 |
+
|
116 |
+
else:
|
117 |
+
st.error("Unsupported file format.")
|
118 |
+
|
119 |
+
else:
|
120 |
+
st.write("Please upload a CSV or TXT file to begin.")
|