Spaces:
Runtime error
Runtime error
Upload scattertext_funtion.py
Browse files- scattertext_funtion.py +63 -0
scattertext_funtion.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import scattertext as stx
|
4 |
+
import spacy
|
5 |
+
import pandas as pd
|
6 |
+
import en_core_web_sm
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# load language model:
|
11 |
+
nlp = en_core_web_sm.load()
|
12 |
+
nlp = spacy.load("en_core_web_md")
|
13 |
+
|
14 |
+
# config
|
15 |
+
st.title("Scattertext Analysis")
|
16 |
+
# TODO:update other web settings
|
17 |
+
|
18 |
+
# upload file
|
19 |
+
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
|
20 |
+
# proceed data
|
21 |
+
if uploaded_file is not None:
|
22 |
+
if uploaded_file.name.endswith(".csv"):
|
23 |
+
df = pd.read_csv(uploaded_file)
|
24 |
+
elif uploaded_file.name.endswith(".txt"):
|
25 |
+
df = pd.read_csv(uploaded_file, sep='\t')
|
26 |
+
# TODO: check, assume contents are seperated by Tabs.
|
27 |
+
# choose function
|
28 |
+
function_choice = st.selectbox('Choose function', ('Choose...', 'Generate Scattertext Plot', '...'))
|
29 |
+
# TODO: add new functions here (paper sections)
|
30 |
+
# define function1
|
31 |
+
if function_choice == 'Generate Scattertext Plot':
|
32 |
+
text_columns = df.select_dtypes(include=['object']).columns.tolist()
|
33 |
+
chosen_column = st.selectbox("Choose text column for analysis", text_columns)
|
34 |
+
if st.button('Generate Scattertext Plot'):
|
35 |
+
# convert to scattertext corpus
|
36 |
+
corpus = stx.CorpusFromPandas(
|
37 |
+
text_columns,
|
38 |
+
category_col=text_columns[chosen_column].head(),
|
39 |
+
text_col=chosen_column,
|
40 |
+
nlp=nlp,
|
41 |
+
). build()
|
42 |
+
# create visualization
|
43 |
+
# customize parameters
|
44 |
+
input_category_name = input('Enter the category name')
|
45 |
+
customize_category_name = st.text_input('Customize parameter', input_category_name)
|
46 |
+
input_non_category_name = input('Enter the non-category name')
|
47 |
+
customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
|
48 |
+
html = stx.produce_scattertext_explorer(corpus,
|
49 |
+
category=df[chosen_column].head(),
|
50 |
+
category_name=customize_category_name,
|
51 |
+
not_category_name=customize_non_category_name,
|
52 |
+
width_in_pixels=1000,
|
53 |
+
minimum_term_frequency=0,
|
54 |
+
metadata=df)
|
55 |
+
st.components.v1.html(html)
|
56 |
+
# TODO: insert new functions
|
57 |
+
|
58 |
+
else:
|
59 |
+
st.error("Unsupported file format.")
|
60 |
+
else:
|
61 |
+
st.write("Please upload a CSV or TXT file to begin.")
|
62 |
+
|
63 |
+
|