Spaces:
Runtime error
Runtime error
import streamlit as st | |
import scattertext as stx | |
import spacy | |
import pandas as pd | |
import en_core_web_md | |
# load language model: | |
nlp = en_core_web_md.load() | |
nlp = spacy.load("en_core_web_md") | |
# config | |
st.title("Scattertext Analysis") | |
# TODO:update other web settings | |
# upload file | |
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"]) | |
# read data | |
if uploaded_file is not None: | |
# choose function | |
function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases']) | |
# function1: generate plot from customized file | |
if st.button('Customized'): | |
# proceed data | |
if uploaded_file.name.endswith(".csv"): | |
df = pd.read_csv(uploaded_file) | |
elif uploaded_file.name.endswith(".txt"): | |
df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs. | |
chosen_column = st.selectbox("Choose text column for analysis", df.columns) | |
# convert to scattertext corpus | |
corpus = stx.CorpusFromPandas( | |
df, | |
category_col=df.head(), | |
text_col=df[chosen_column], | |
nlp=nlp, | |
). build() | |
# create visualization | |
# customize parameters | |
customize_category_name = st.text_input('Enter the category name') | |
customize_non_category_name = st.text_input('Enter the non-category name') | |
html = stx.produce_scattertext_explorer(corpus, | |
category=chosen_column, | |
category_name=customize_category_name, | |
not_category_name=customize_non_category_name, | |
width_in_pixels=1000, | |
minimum_term_frequency=0, | |
metadata=df) | |
st.components.v1.html(html) | |
else: | |
st.error("Unsupported file format.") | |
# function2: generate plot from databases | |
elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract. | |
# scopus & lens | |
if uploaded_file.name.endswith(".csv"): | |
df = pd.read_csv(uploaded_file) | |
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns) | |
if chosen_column == 'Abstract': | |
st.write("This column cannot be selected, please select again") | |
else: | |
# make plot | |
corpus = stx.CorpusFromPandas( | |
df, | |
category_col=df[chosen_column], | |
text_col='Abstract', | |
nlp=nlp, | |
).build() | |
# generate HTML visualization | |
input_category_name = input('Enter the category name') | |
customize_category_name = st.text_input('Customize parameter', input_category_name) | |
input_non_category_name = input('Enter the non-category name') | |
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) | |
html = stx.produce_scattertext_explorer(corpus, | |
category=chosen_column, | |
category_name=customize_category_name, | |
not_category_name=customize_non_category_name, | |
width_in_pixels=1000, | |
minimum_term_frequency=0, | |
metadata=df) | |
st.components.v1.html(html) | |
# web of science | |
elif uploaded_file.name.endswith(".txt"): | |
df = pd.read_table(uploaded_file, sep='\t') | |
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head()) | |
if chosen_column == 'AB': | |
st.write("This column cannot be selected, please select again") | |
else: | |
# make plot | |
corpus = stx.CorpusFromPandas( | |
df, | |
category_col=df[chosen_column], | |
text_col='Abstract', | |
nlp=nlp, | |
).build() | |
# generate HTML visualization | |
input_category_name = input('Enter the category name') | |
customize_category_name = st.text_input('Customize parameter', input_category_name) | |
input_non_category_name = input('Enter the non-category name') | |
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) | |
html = stx.produce_scattertext_explorer(corpus, | |
category=chosen_column, | |
category_name=customize_category_name, | |
not_category_name=customize_non_category_name, | |
width_in_pixels=1000, | |
minimum_term_frequency=0, | |
metadata=df) | |
st.components.v1.html(html) | |
else: | |
st.error("Unsupported file format.") | |
else: | |
st.write("Please upload a CSV or TXT file to begin.") | |