Oliviayc commited on
Commit
b6a8e41
·
verified ·
1 Parent(s): ca6b49b

Upload scattertext_funtion.py

Browse files
Files changed (1) hide show
  1. scattertext_funtion.py +63 -0
scattertext_funtion.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import scattertext as stx
4
+ import spacy
5
+ import pandas as pd
6
+ import en_core_web_sm
7
+
8
+
9
+
10
+ # load language model:
11
+ nlp = en_core_web_sm.load()
12
+ nlp = spacy.load("en_core_web_md")
13
+
14
+ # config
15
+ st.title("Scattertext Analysis")
16
+ # TODO:update other web settings
17
+
18
+ # upload file
19
+ uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
20
+ # proceed data
21
+ if uploaded_file is not None:
22
+ if uploaded_file.name.endswith(".csv"):
23
+ df = pd.read_csv(uploaded_file)
24
+ elif uploaded_file.name.endswith(".txt"):
25
+ df = pd.read_csv(uploaded_file, sep='\t')
26
+ # TODO: check, assume contents are seperated by Tabs.
27
+ # choose function
28
+ function_choice = st.selectbox('Choose function', ('Choose...', 'Generate Scattertext Plot', '...'))
29
+ # TODO: add new functions here (paper sections)
30
+ # define function1
31
+ if function_choice == 'Generate Scattertext Plot':
32
+ text_columns = df.select_dtypes(include=['object']).columns.tolist()
33
+ chosen_column = st.selectbox("Choose text column for analysis", text_columns)
34
+ if st.button('Generate Scattertext Plot'):
35
+ # convert to scattertext corpus
36
+ corpus = stx.CorpusFromPandas(
37
+ text_columns,
38
+ category_col=text_columns[chosen_column].head(),
39
+ text_col=chosen_column,
40
+ nlp=nlp,
41
+ ). build()
42
+ # create visualization
43
+ # customize parameters
44
+ input_category_name = input('Enter the category name')
45
+ customize_category_name = st.text_input('Customize parameter', input_category_name)
46
+ input_non_category_name = input('Enter the non-category name')
47
+ customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
48
+ html = stx.produce_scattertext_explorer(corpus,
49
+ category=df[chosen_column].head(),
50
+ category_name=customize_category_name,
51
+ not_category_name=customize_non_category_name,
52
+ width_in_pixels=1000,
53
+ minimum_term_frequency=0,
54
+ metadata=df)
55
+ st.components.v1.html(html)
56
+ # TODO: insert new functions
57
+
58
+ else:
59
+ st.error("Unsupported file format.")
60
+ else:
61
+ st.write("Please upload a CSV or TXT file to begin.")
62
+
63
+