import streamlit as st import scattertext as stx import spacy import pandas as pd import en_core_web_md # load language model: nlp = en_core_web_md.load() nlp = spacy.load("en_core_web_md") # config st.title("Scattertext Analysis") # TODO:update other web settings # upload file uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"]) # read data if uploaded_file is not None: # choose function function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases']) # function1: generate plot from customized file if st.button('Customized'): # proceed data if uploaded_file.name.endswith(".csv"): df = pd.read_csv(uploaded_file) elif uploaded_file.name.endswith(".txt"): df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs. chosen_column = st.selectbox("Choose text column for analysis", df.columns) # convert to scattertext corpus corpus = stx.CorpusFromPandas( df, category_col=df.head(), text_col=df[chosen_column], nlp=nlp, ). build() # create visualization # customize parameters customize_category_name = st.text_input('Enter the category name') customize_non_category_name = st.text_input('Enter the non-category name') html = stx.produce_scattertext_explorer(corpus, category=chosen_column, category_name=customize_category_name, not_category_name=customize_non_category_name, width_in_pixels=1000, minimum_term_frequency=0, metadata=df) st.components.v1.html(html) else: st.error("Unsupported file format.") # function2: generate plot from databases elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract. # scopus & lens if uploaded_file.name.endswith(".csv"): df = pd.read_csv(uploaded_file) chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns) if chosen_column == 'Abstract': st.write("This column cannot be selected, please select again") else: # make plot corpus = stx.CorpusFromPandas( df, category_col=df[chosen_column], text_col='Abstract', nlp=nlp, ).build() # generate HTML visualization input_category_name = input('Enter the category name') customize_category_name = st.text_input('Customize parameter', input_category_name) input_non_category_name = input('Enter the non-category name') customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) html = stx.produce_scattertext_explorer(corpus, category=chosen_column, category_name=customize_category_name, not_category_name=customize_non_category_name, width_in_pixels=1000, minimum_term_frequency=0, metadata=df) st.components.v1.html(html) # web of science elif uploaded_file.name.endswith(".txt"): df = pd.read_table(uploaded_file, sep='\t') chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head()) if chosen_column == 'AB': st.write("This column cannot be selected, please select again") else: # make plot corpus = stx.CorpusFromPandas( df, category_col=df[chosen_column], text_col='Abstract', nlp=nlp, ).build() # generate HTML visualization input_category_name = input('Enter the category name') customize_category_name = st.text_input('Customize parameter', input_category_name) input_non_category_name = input('Enter the non-category name') customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) html = stx.produce_scattertext_explorer(corpus, category=chosen_column, category_name=customize_category_name, not_category_name=customize_non_category_name, width_in_pixels=1000, minimum_term_frequency=0, metadata=df) st.components.v1.html(html) else: st.error("Unsupported file format.") else: st.write("Please upload a CSV or TXT file to begin.")