Spaces:
Runtime error
Runtime error
File size: 5,592 Bytes
b6a8e41 c492dcd b6a8e41 c492dcd b6a8e41 8234e30 b6a8e41 c492dcd 2cb8bbb c492dcd 8234e30 c492dcd 8234e30 c492dcd 2cb8bbb b6a8e41 8234e30 b6a8e41 2cb8bbb b6a8e41 8234e30 b6a8e41 b6b72c7 8234e30 2cb8bbb 8234e30 c492dcd 8234e30 b6a8e41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import scattertext as stx
import spacy
import pandas as pd
import en_core_web_md
# load language model:
nlp = en_core_web_md.load()
nlp = spacy.load("en_core_web_md")
# config
st.title("Scattertext Analysis")
# TODO:update other web settings
# upload file
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
# read data
if uploaded_file is not None:
# choose function
function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])
# function1: generate plot from customized file
if st.button('Customized'):
# proceed data
if uploaded_file.name.endswith(".csv"):
df = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith(".txt"):
df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs.
chosen_column = st.selectbox("Choose text column for analysis", df.columns)
# convert to scattertext corpus
corpus = stx.CorpusFromPandas(
df,
category_col=df.head(),
text_col=df[chosen_column],
nlp=nlp,
). build()
# create visualization
# customize parameters
customize_category_name = st.text_input('Enter the category name')
customize_non_category_name = st.text_input('Enter the non-category name')
html = stx.produce_scattertext_explorer(corpus,
category=chosen_column,
category_name=customize_category_name,
not_category_name=customize_non_category_name,
width_in_pixels=1000,
minimum_term_frequency=0,
metadata=df)
st.components.v1.html(html)
else:
st.error("Unsupported file format.")
# function2: generate plot from databases
elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract.
# scopus & lens
if uploaded_file.name.endswith(".csv"):
df = pd.read_csv(uploaded_file)
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
if chosen_column == 'Abstract':
st.write("This column cannot be selected, please select again")
else:
# make plot
corpus = stx.CorpusFromPandas(
df,
category_col=df[chosen_column],
text_col='Abstract',
nlp=nlp,
).build()
# generate HTML visualization
input_category_name = input('Enter the category name')
customize_category_name = st.text_input('Customize parameter', input_category_name)
input_non_category_name = input('Enter the non-category name')
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
html = stx.produce_scattertext_explorer(corpus,
category=chosen_column,
category_name=customize_category_name,
not_category_name=customize_non_category_name,
width_in_pixels=1000,
minimum_term_frequency=0,
metadata=df)
st.components.v1.html(html)
# web of science
elif uploaded_file.name.endswith(".txt"):
df = pd.read_table(uploaded_file, sep='\t')
chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
if chosen_column == 'AB':
st.write("This column cannot be selected, please select again")
else:
# make plot
corpus = stx.CorpusFromPandas(
df,
category_col=df[chosen_column],
text_col='Abstract',
nlp=nlp,
).build()
# generate HTML visualization
input_category_name = input('Enter the category name')
customize_category_name = st.text_input('Customize parameter', input_category_name)
input_non_category_name = input('Enter the non-category name')
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
html = stx.produce_scattertext_explorer(corpus,
category=chosen_column,
category_name=customize_category_name,
not_category_name=customize_non_category_name,
width_in_pixels=1000,
minimum_term_frequency=0,
metadata=df)
st.components.v1.html(html)
else:
st.error("Unsupported file format.")
else:
st.write("Please upload a CSV or TXT file to begin.")
|