Oliviayc commited on
Commit
c492dcd
·
1 Parent(s): b6a8e41

Modify the overall framework and improve the first function

Browse files
Files changed (2) hide show
  1. requirements.txt +74 -0
  2. scattertext_funtion.py +18 -16
requirements.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.6.0
2
+ archspec @ file:///croot/archspec_1697725767277/work
3
+ blis==0.7.11
4
+ boltons @ file:///Users/cbousseau/work/recipes/ci_py311/boltons_1677965141748/work
5
+ Brotli @ file:///Users/cbousseau/work/recipes/ci_py311/brotli-split_1677936346777/work
6
+ catalogue==2.0.10
7
+ certifi @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_35eq66o3mo/croot/certifi_1700501684871/work/certifi
8
+ cffi @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_b4nang6w_y/croot/cffi_1700254307954/work
9
+ charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
10
+ click==8.1.7
11
+ cloudpathlib==0.16.0
12
+ conda @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a2jidg4a7u/croot/conda_1706737968110/work
13
+ conda-content-trust @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5324skqvu9/croot/conda-content-trust_1693490622873/work
14
+ conda-libmamba-solver @ file:///croot/conda-libmamba-solver_1702997573971/work/src
15
+ conda-package-handling @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_fc4cx8vjhj/croot/conda-package-handling_1690999937094/work
16
+ conda_package_streaming @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_aecpaup22q/croot/conda-package-streaming_1690987978274/work
17
+ confection==0.1.4
18
+ cryptography @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_b1p0q5vizk/croot/cryptography_1702070293829/work
19
+ cymem==2.0.8
20
+ distro @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_10ye1wu827/croot/distro_1701455019085/work
21
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
22
+ flashtext==2.7
23
+ gensim==4.3.2
24
+ geographiclib @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_bb1rwz0oji/croot/geographiclib_1683807319323/work
25
+ idna @ file:///Users/cbousseau/work/recipes/ci_py311/idna_1677906072337/work
26
+ Jinja2==3.1.3
27
+ joblib==1.3.2
28
+ jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
29
+ jsonpointer==2.1
30
+ langcodes==3.3.0
31
+ libmambapy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_59l2npsw_8/croot/mamba-split_1698782625405/work/libmambapy
32
+ MarkupSafe==2.1.5
33
+ menuinst @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_a2fxmxkg_z/croot/menuinst_1702390297906/work
34
+ murmurhash==1.0.10
35
+ numpy==1.26.4
36
+ packaging @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_6dm6d4jd_t/croot/packaging_1693575176524/work
37
+ pandas==2.2.0
38
+ patsy==0.5.6
39
+ platformdirs @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a8u4fy8k9o/croot/platformdirs_1692205661656/work
40
+ pluggy @ file:///Users/cbousseau/work/recipes/ci_py311/pluggy_1677906980825/work
41
+ preshed==3.0.9
42
+ pycosat @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3eg8vdcs6z/croot/pycosat_1696536519213/work
43
+ pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
44
+ pydantic==2.6.1
45
+ pydantic_core==2.16.2
46
+ pyOpenSSL @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_b8whqav6qm/croot/pyopenssl_1690223428943/work
47
+ PySocks @ file:///Users/cbousseau/work/recipes/ci_py311/pysocks_1677906386870/work
48
+ python-dateutil==2.8.2
49
+ pytz==2024.1
50
+ requests @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_54zi68h2nb/croot/requests_1690400233316/work
51
+ ruamel.yaml @ file:///Users/cbousseau/work/recipes/ci_py311/ruamel.yaml_1677934845850/work
52
+ scattertext==0.2.0
53
+ scikit-learn==1.4.1.post1
54
+ scipy==1.12.0
55
+ six==1.16.0
56
+ smart-open==6.4.0
57
+ spacy==3.7.4
58
+ spacy-legacy==3.0.12
59
+ spacy-loggers==1.0.5
60
+ srsly==2.4.8
61
+ statsmodels==0.14.1
62
+ streamlit
63
+ thinc==8.2.3
64
+ threadpoolctl==3.3.0
65
+ tqdm @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ac7zic_tin/croot/tqdm_1679561870178/work
66
+ truststore @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_42mm7e6j06/croot/truststore_1695244298716/work
67
+ typer==0.9.0
68
+ typing_extensions==4.9.0
69
+ tzdata==2024.1
70
+ urllib3 @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_068obtb882/croot/urllib3_1698257558009/work
71
+ wasabi==1.1.2
72
+ weasel==0.3.4
73
+ zstandard @ file:///Users/cbousseau/work/recipes/ci_py311_2/zstandard_1678996192313/work
74
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz
scattertext_funtion.py CHANGED
@@ -3,12 +3,12 @@ import streamlit as st
3
  import scattertext as stx
4
  import spacy
5
  import pandas as pd
6
- import en_core_web_sm
7
 
8
 
9
 
10
  # load language model:
11
- nlp = en_core_web_sm.load()
12
  nlp = spacy.load("en_core_web_md")
13
 
14
  # config
@@ -16,22 +16,23 @@ st.title("Scattertext Analysis")
16
  # TODO:update other web settings
17
 
18
  # upload file
 
 
 
19
  uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
20
  # proceed data
21
  if uploaded_file is not None:
22
- if uploaded_file.name.endswith(".csv"):
23
- df = pd.read_csv(uploaded_file)
24
- elif uploaded_file.name.endswith(".txt"):
25
- df = pd.read_csv(uploaded_file, sep='\t')
26
- # TODO: check, assume contents are seperated by Tabs.
27
- # choose function
28
- function_choice = st.selectbox('Choose function', ('Choose...', 'Generate Scattertext Plot', '...'))
29
- # TODO: add new functions here (paper sections)
30
- # define function1
31
- if function_choice == 'Generate Scattertext Plot':
32
- text_columns = df.select_dtypes(include=['object']).columns.tolist()
33
- chosen_column = st.selectbox("Choose text column for analysis", text_columns)
34
- if st.button('Generate Scattertext Plot'):
35
  # convert to scattertext corpus
36
  corpus = stx.CorpusFromPandas(
37
  text_columns,
@@ -53,7 +54,8 @@ if uploaded_file is not None:
53
  minimum_term_frequency=0,
54
  metadata=df)
55
  st.components.v1.html(html)
56
- # TODO: insert new functions
 
57
 
58
  else:
59
  st.error("Unsupported file format.")
 
3
  import scattertext as stx
4
  import spacy
5
  import pandas as pd
6
+ import en_core_web_md
7
 
8
 
9
 
10
  # load language model:
11
+ nlp = en_core_web_md.load()
12
  nlp = spacy.load("en_core_web_md")
13
 
14
  # config
 
16
  # TODO:update other web settings
17
 
18
  # upload file
19
+
20
+
21
+ # read data
22
  uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
23
  # proceed data
24
  if uploaded_file is not None:
25
+ # choose function
26
+ function_choice = st.selectbox('Choose file source', ('Choose...', 'Customized', 'Download from Online Databases'))
27
+ # function1: generate plot from customized file
28
+ if st.button('Customized'):
29
+ if uploaded_file.name.endswith(".csv"):
30
+ df = pd.read_csv(uploaded_file)
31
+ elif uploaded_file.name.endswith(".txt"):
32
+ df = pd.read_csv(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs.
33
+
34
+ text_columns = df.select_dtypes(include=['object']).columns.tolist()
35
+ chosen_column = st.selectbox("Choose text column for analysis", text_columns)
 
 
36
  # convert to scattertext corpus
37
  corpus = stx.CorpusFromPandas(
38
  text_columns,
 
54
  minimum_term_frequency=0,
55
  metadata=df)
56
  st.components.v1.html(html)
57
+
58
+
59
 
60
  else:
61
  st.error("Unsupported file format.")