Spaces:

Oliviayc
/

scattertext

Runtime error

App Files Files Community

Oliviayc commited on Apr 24, 2024

Commit

ab928f5

1 Parent(s): 50c295e

make sure the options are author and title columns, modulized comparison 1 and 2

Browse files

Files changed (1) hide show

app.py +108 -105

app.py CHANGED Viewed

@@ -12,115 +12,118 @@ nlp = spacy.load("en_core_web_md")
 # Scopus file loading
 st.title("Scattertext Analysis")
 st.header("Put your file here... ")
-uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
-if uploaded_file is not None:
-    # determine file type
-    if uploaded_file.name.endswith(".csv"):
-        df = pd.read_csv(uploaded_file)
-        # preview the uploaded file
-    elif uploaded_file.name.endswith(".txt"):
-        df = pd.read_table(uploaded_file, sep='\t')  # Doc: assume contents are seperated by Tabs.
-        # preview the uploaded file
-    else:
-        st.error("Unsupported file format.")
-    # layout row1
-    row1_col1, row1_col2 = st.columns(2)
-    choose_column = ('Abstract', 'Source Title')
-    with row1_col1:
-        choice = st.selectbox("Choose column to analyze", choose_column)
-    comparison_options = ('Sources', 'Years')
-    with row1_col2:
-        type_of_comparison = st.selectbox("Type of comparison", comparison_options)
-    if choose_column == 'Abstract':
-        # type_of_comparison 1
-        if type_of_comparison == "Sources":
-            row2_col1, row2_col2 = st.columns(2)
-            with row2_col1:
-                first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
-            with row2_col2:
-                second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
-                                             key='second_source_select')
-            # filter data
-            first_data = df[df['Source title'] == first_source].copy()
-            second_data = df[df['Source title'] == second_source].copy()
-            filtered_data = pd.concat([first_data, second_data])
-            st.write(filtered_data)
-            if st.button("Generate the Scattertext Plot"):
-                # make plot
-                corpus = sct.CorpusFromPandas(
-                    filtered_data,
-                    category_col="Source title",
-                    text_col='Abstract',
-                    nlp=nlp,
-                ).build()
-                # generate HTML visualization
-                html = sct.produce_scattertext_explorer(corpus,
-                                                        category=first_source,
-                                                        category_name=first_source,
-                                                        not_category_name=second_source,
-                                                        width_in_pixels=900,
-                                                        minimum_term_frequency=0,
-                                                        metadata=filtered_data)
-                st.components.v1.html(html, width=1000, height=600)
         # type_of_comparison 2
-        if type_of_comparison == "Years":
-            df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
-            df.dropna(subset=['Year'], inplace=True)
-            df['Year'] = df['Year'].astype(int)
-            min_year = int(df['Year'].min())
-            max_year = int(df['Year'].max())
-            # layout row2
-            row2_col1, row2_col2 = st.columns(2)
-            with row2_col1:
-                first_range = st.slider("First range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
-            with row2_col2:
-                second_range = st.slider("Second range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
-            # filter data
-            first_range_filter_df = df[(df['Year'] >= first_range[0]) & (df['Year'] <= first_range[1])].copy()
-            first_range_filter_df['Topic Range'] = 'First range'
-            second_range_filter_df = df[(df['Year'] >= second_range[0]) & (df['Year'] <= second_range[1])].copy()
-            second_range_filter_df['Topic Range'] = 'Second range'
-            filtered_df = pd.concat([first_range_filter_df, second_range_filter_df])
-            st.write(filtered_df)
-            if st.button("Generate the Scattertext Plot"):
-                # make plot
-                corpus = sct.CorpusFromPandas(
-                    filtered_df,
-                    category_col="Topic Range",
-                    text_col='Abstract',
-                    nlp=nlp,
-                ).build()
-                # generate HTML visualization
-                html = sct.produce_scattertext_explorer(corpus,
-                                                        category='First range',
-                                                        category_name='First range',
-                                                        not_category_name='Second range',
-                                                        width_in_pixels=900,
-                                                        minimum_term_frequency=0,
-                                                        metadata=filtered_df)
-                st.components.v1.html(html, width=1000, height=600)

 # Scopus file loading
 st.title("Scattertext Analysis")
 st.header("Put your file here... ")
+def compatison1(selected_column):
+    # type_of_comparison 1
+    row2_col1, row2_col2 = st.columns(2)
+    with row2_col1:
+        first_source = st.selectbox("Choose First Source", df['Source title'].unique(), key='first_source_select')
+    with row2_col2:
+        second_source = st.selectbox("Choose Second Source", df['Source title'].unique(),
+                                     key='second_source_select')
+    # filter data
+    first_data = df[df['Source title'] == first_source].copy()
+    second_data = df[df['Source title'] == second_source].copy()
+    filtered_data = pd.concat([first_data, second_data])
+    st.write(filtered_data)
+    if st.button("Generate the Scattertext Plot"):
+        # make plot
+        corpus = sct.CorpusFromPandas(
+            filtered_data,
+            category_col="Source title",
+            text_col= selected_column,
+            nlp=nlp,
+        ).build()
+        # generate HTML visualization
+        html = sct.produce_scattertext_explorer(corpus,
+                                                category=first_source,
+                                                category_name=first_source,
+                                                not_category_name=second_source,
+                                                width_in_pixels=900,
+                                                minimum_term_frequency=0,
+                                                metadata=filtered_data)
+        st.components.v1.html(html, width=1000, height=600)
+    return
         # type_of_comparison 2
+def comparison2(selected_column):
+    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
+    df.dropna(subset=['Year'], inplace=True)
+    df['Year'] = df['Year'].astype(int)
+    min_year = int(df['Year'].min())
+    max_year = int(df['Year'].max())
+    # layout row2
+    row2_col1, row2_col2 = st.columns(2)
+    with row2_col1:
+        first_range = st.slider("First range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
+    with row2_col2:
+        second_range = st.slider("Second range", min_value = min_year, max_value= max_year, step = 1, value= (min_year, max_year))
+    # filter data
+    first_range_filter_df = df[(df['Year'] >= first_range[0]) & (df['Year'] <= first_range[1])].copy()
+    first_range_filter_df['Topic Range'] = 'First range'
+    second_range_filter_df = df[(df['Year'] >= second_range[0]) & (df['Year'] <= second_range[1])].copy()
+    second_range_filter_df['Topic Range'] = 'Second range'
+    filtered_df = pd.concat([first_range_filter_df, second_range_filter_df])
+    st.write(filtered_df)
+    if st.button("Generate the Scattertext Plot"):
+        # make plot
+        corpus = sct.CorpusFromPandas(
+            filtered_df,
+            category_col="Topic Range",
+            text_col= selected_column,
+            nlp=nlp,
+        ).build()
+        # generate HTML visualization
+        html = sct.produce_scattertext_explorer(corpus,
+                                                category='First range',
+                                                category_name='First range',
+                                                not_category_name='Second range',
+                                                width_in_pixels=900,
+                                                minimum_term_frequency=0,
+                                                metadata=filtered_df)
+        st.components.v1.html(html, width=1000, height=600)
+    return
+if __name__ == '__main__':
+    uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
+    if uploaded_file is not None:
+        # determine file type
+        if uploaded_file.name.endswith(".csv"):
+            df = pd.read_csv(uploaded_file)
+            abstract_col = 'Abstract'
+            title_col = 'Title'
+            # preview the uploaded file
+        elif uploaded_file.name.endswith(".txt"):
+            df = pd.read_table(uploaded_file, sep='\t')  # Doc: assume contents are seperated by Tabs.
+            abstract_col = 'AB'
+            title_col = 'TI'
+            # preview the uploaded file
+        else:
+            st.error("Unsupported file format.")
+        column_choices = (abstract_col, title_col)
+        # layout row1
+        row1_col1, row1_col2 = st.columns(2)
+        with row1_col1:
+            choice = st.selectbox("Choose column to analyze", column_choices)
+        comparison_options = ('Sources', 'Years')
+        with row1_col2:
+            type_of_comparison = st.selectbox("Type of comparison", comparison_options)
+            if type_of_comparison == 'Sources':
+                compatison1(column_choices)
+            if type_of_comparison == 'Years':
+                comparison2(column_choices)