Spaces:

Oliviayc
/

scattertext

Runtime error

App Files Files Community

Oliviayc commited on Mar 22, 2024

Commit

95f5cd3

verified ·

1 Parent(s): 2cb8bbb

Create app.py

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import streamlit as st
+import scattertext as stx
+import spacy
+import pandas as pd
+import en_core_web_md
+# load language model:
+nlp = en_core_web_md.load()
+nlp = spacy.load("en_core_web_md")
+# config
+st.title("Scattertext Analysis")
+# TODO：update other web settings
+# upload file
+uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
+# read data
+if uploaded_file is not None:
+    # choose function
+    function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])
+    # function1: generate plot from customized file
+    if st.button('Customized'):
+        # proceed data
+        if uploaded_file.name.endswith(".csv"):
+            df = pd.read_csv(uploaded_file)
+        elif uploaded_file.name.endswith(".txt"):
+            df = pd.read_table(uploaded_file, sep='\t')  # TODO : doc: assume contents are seperated by Tabs.
+            chosen_column = st.selectbox("Choose text column for analysis", df.columns)
+            # convert to scattertext corpus
+            corpus = stx.CorpusFromPandas(
+                df,
+                category_col=df.head(),
+                text_col=df[chosen_column],
+                nlp=nlp,
+                ). build()
+            # create visualization
+            # customize parameters
+            customize_category_name = st.text_input('Enter the category name')
+            customize_non_category_name = st.text_input('Enter the non-category name')
+            html = stx.produce_scattertext_explorer(corpus,
+                                       category=chosen_column,
+                                       category_name=customize_category_name,
+                                       not_category_name=customize_non_category_name,
+                                       width_in_pixels=1000,
+                                       minimum_term_frequency=0,
+                                       metadata=df)
+            st.components.v1.html(html)
+        else:
+            st.error("Unsupported file format.")
+    # function2: generate plot from databases
+    elif st.button('Download from Online Databases'):  # TODO doc: Explain: analyze abstract.
+        # scopus & lens
+        if uploaded_file.name.endswith(".csv"):
+            df = pd.read_csv(uploaded_file)
+            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
+            if chosen_column == 'Abstract':
+                st.write("This column cannot be selected, please select again")
+            else:
+                # make plot
+                corpus = stx.CorpusFromPandas(
+                    df,
+                    category_col=df[chosen_column],
+                    text_col='Abstract',
+                    nlp=nlp,
+                ).build()
+                # generate HTML visualization
+                input_category_name = input('Enter the category name')
+                customize_category_name = st.text_input('Customize parameter', input_category_name)
+                input_non_category_name = input('Enter the non-category name')
+                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
+                html = stx.produce_scattertext_explorer(corpus,
+                                                        category=chosen_column,
+                                                        category_name=customize_category_name,
+                                                        not_category_name=customize_non_category_name,
+                                                        width_in_pixels=1000,
+                                                        minimum_term_frequency=0,
+                                                        metadata=df)
+                st.components.v1.html(html)
+        # web of science
+        elif uploaded_file.name.endswith(".txt"):
+            df = pd.read_table(uploaded_file, sep='\t')
+            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
+            if chosen_column == 'AB':
+                st.write("This column cannot be selected, please select again")
+            else:
+                # make plot
+                corpus = stx.CorpusFromPandas(
+                    df,
+                    category_col=df[chosen_column],
+                    text_col='Abstract',
+                    nlp=nlp,
+                ).build()
+                # generate HTML visualization
+                input_category_name = input('Enter the category name')
+                customize_category_name = st.text_input('Customize parameter', input_category_name)
+                input_non_category_name = input('Enter the non-category name')
+                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
+                html = stx.produce_scattertext_explorer(corpus,
+                                                        category=chosen_column,
+                                                        category_name=customize_category_name,
+                                                        not_category_name=customize_non_category_name,
+                                                        width_in_pixels=1000,
+                                                        minimum_term_frequency=0,
+                                                        metadata=df)
+                st.components.v1.html(html)
+        else:
+            st.error("Unsupported file format.")
+else:
+    st.write("Please upload a CSV or TXT file to begin.")