File size: 5,592 Bytes
b6a8e41
 
 
 
 
c492dcd
b6a8e41
 
 
 
c492dcd
b6a8e41
 
 
 
 
 
 
 
8234e30
b6a8e41
c492dcd
2cb8bbb
 
c492dcd
 
8234e30
c492dcd
 
 
8234e30
c492dcd
2cb8bbb
b6a8e41
 
8234e30
 
 
b6a8e41
 
 
 
2cb8bbb
 
b6a8e41
8234e30
b6a8e41
 
 
 
 
b6b72c7
 
 
 
 
8234e30
 
 
 
2cb8bbb
8234e30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c492dcd
 
8234e30
 
b6a8e41
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

import streamlit as st
import scattertext as stx
import spacy
import pandas as pd
import en_core_web_md



# load language model:
nlp = en_core_web_md.load()
nlp = spacy.load("en_core_web_md")

# config
st.title("Scattertext Analysis")
# TODO:update other web settings

# upload file
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
# read data
if uploaded_file is not None:
    # choose function
    function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])

    # function1: generate plot from customized file
    if st.button('Customized'):
        # proceed data
        if uploaded_file.name.endswith(".csv"):
            df = pd.read_csv(uploaded_file)
        elif uploaded_file.name.endswith(".txt"):
            df = pd.read_table(uploaded_file, sep='\t')  # TODO : doc: assume contents are seperated by Tabs.

            chosen_column = st.selectbox("Choose text column for analysis", df.columns)
            # convert to scattertext corpus
            corpus = stx.CorpusFromPandas(
                df,
                category_col=df.head(),
                text_col=df[chosen_column],
                nlp=nlp,
                ). build()
            # create visualization
            # customize parameters
            customize_category_name = st.text_input('Enter the category name')
            customize_non_category_name = st.text_input('Enter the non-category name')
            html = stx.produce_scattertext_explorer(corpus,
                                       category=chosen_column,
                                       category_name=customize_category_name,
                                       not_category_name=customize_non_category_name,
                                       width_in_pixels=1000,
                                       minimum_term_frequency=0,
                                       metadata=df)
            st.components.v1.html(html)
        else:
            st.error("Unsupported file format.")

    # function2: generate plot from databases
    elif st.button('Download from Online Databases'):  # TODO doc: Explain: analyze abstract.
        # scopus & lens
        if uploaded_file.name.endswith(".csv"):
            df = pd.read_csv(uploaded_file)
            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
            if chosen_column == 'Abstract':
                st.write("This column cannot be selected, please select again")
            else:
                # make plot
                corpus = stx.CorpusFromPandas(
                    df,
                    category_col=df[chosen_column],
                    text_col='Abstract',
                    nlp=nlp,
                ).build()
                # generate HTML visualization
                input_category_name = input('Enter the category name')
                customize_category_name = st.text_input('Customize parameter', input_category_name)
                input_non_category_name = input('Enter the non-category name')
                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
                html = stx.produce_scattertext_explorer(corpus,
                                                        category=chosen_column,
                                                        category_name=customize_category_name,
                                                        not_category_name=customize_non_category_name,
                                                        width_in_pixels=1000,
                                                        minimum_term_frequency=0,
                                                        metadata=df)
                st.components.v1.html(html)

        # web of science
        elif uploaded_file.name.endswith(".txt"):
            df = pd.read_table(uploaded_file, sep='\t')
            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
            if chosen_column == 'AB':
                st.write("This column cannot be selected, please select again")
            else:
                # make plot
                corpus = stx.CorpusFromPandas(
                    df,
                    category_col=df[chosen_column],
                    text_col='Abstract',
                    nlp=nlp,
                ).build()

                # generate HTML visualization
                input_category_name = input('Enter the category name')
                customize_category_name = st.text_input('Customize parameter', input_category_name)
                input_non_category_name = input('Enter the non-category name')
                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
                html = stx.produce_scattertext_explorer(corpus,
                                                        category=chosen_column,
                                                        category_name=customize_category_name,
                                                        not_category_name=customize_non_category_name,
                                                        width_in_pixels=1000,
                                                        minimum_term_frequency=0,
                                                        metadata=df)

                st.components.v1.html(html)


        else:
            st.error("Unsupported file format.")

else:
    st.write("Please upload a CSV or TXT file to begin.")