Spaces:

Oliviayc
/

scattertext

Runtime error

App Files Files Community

scattertext / scattertext_draft.py

Oliviayc

Rename scattertext_funtion.py to scattertext_draft.py

32dc69e verified 11 months ago

raw

history blame contribute delete

5.59 kB


	import streamlit as st
	import scattertext as stx
	import spacy
	import pandas as pd
	import en_core_web_md



	# load language model:
	nlp = en_core_web_md.load()
	nlp = spacy.load("en_core_web_md")

	# config
	st.title("Scattertext Analysis")
	# TODO：update other web settings

	# upload file
	uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
	# read data
	if uploaded_file is not None:
	# choose function
	function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])

	# function1: generate plot from customized file
	if st.button('Customized'):
	# proceed data
	if uploaded_file.name.endswith(".csv"):
	df = pd.read_csv(uploaded_file)
	elif uploaded_file.name.endswith(".txt"):
	df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs.

	chosen_column = st.selectbox("Choose text column for analysis", df.columns)
	# convert to scattertext corpus
	corpus = stx.CorpusFromPandas(
	df,
	category_col=df.head(),
	text_col=df[chosen_column],
	nlp=nlp,
	). build()
	# create visualization
	# customize parameters
	customize_category_name = st.text_input('Enter the category name')
	customize_non_category_name = st.text_input('Enter the non-category name')
	html = stx.produce_scattertext_explorer(corpus,
	category=chosen_column,
	category_name=customize_category_name,
	not_category_name=customize_non_category_name,
	width_in_pixels=1000,
	minimum_term_frequency=0,
	metadata=df)
	st.components.v1.html(html)
	else:
	st.error("Unsupported file format.")

	# function2: generate plot from databases
	elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract.
	# scopus & lens
	if uploaded_file.name.endswith(".csv"):
	df = pd.read_csv(uploaded_file)
	chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
	if chosen_column == 'Abstract':
	st.write("This column cannot be selected, please select again")
	else:
	# make plot
	corpus = stx.CorpusFromPandas(
	df,
	category_col=df[chosen_column],
	text_col='Abstract',
	nlp=nlp,
	).build()
	# generate HTML visualization
	input_category_name = input('Enter the category name')
	customize_category_name = st.text_input('Customize parameter', input_category_name)
	input_non_category_name = input('Enter the non-category name')
	customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
	html = stx.produce_scattertext_explorer(corpus,
	category=chosen_column,
	category_name=customize_category_name,
	not_category_name=customize_non_category_name,
	width_in_pixels=1000,
	minimum_term_frequency=0,
	metadata=df)
	st.components.v1.html(html)

	# web of science
	elif uploaded_file.name.endswith(".txt"):
	df = pd.read_table(uploaded_file, sep='\t')
	chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
	if chosen_column == 'AB':
	st.write("This column cannot be selected, please select again")
	else:
	# make plot
	corpus = stx.CorpusFromPandas(
	df,
	category_col=df[chosen_column],
	text_col='Abstract',
	nlp=nlp,
	).build()

	# generate HTML visualization
	input_category_name = input('Enter the category name')
	customize_category_name = st.text_input('Customize parameter', input_category_name)
	input_non_category_name = input('Enter the non-category name')
	customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
	html = stx.produce_scattertext_explorer(corpus,
	category=chosen_column,
	category_name=customize_category_name,
	not_category_name=customize_non_category_name,
	width_in_pixels=1000,
	minimum_term_frequency=0,
	metadata=df)

	st.components.v1.html(html)


	else:
	st.error("Unsupported file format.")

	else:
	st.write("Please upload a CSV or TXT file to begin.")