import streamlit as st
import scattertext as stx
import spacy
import pandas as pd
import en_core_web_md


# load language model:
nlp = en_core_web_md.load()
nlp = spacy.load("en_core_web_md")

# config
st.title("Scattertext Analysis")
# TODO：update other web settings

# upload file
uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"])
# read data
if uploaded_file is not None:
    # choose function
    function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases'])

    # function1: generate plot from customized file
    if st.button('Customized'):
        # proceed data
        if uploaded_file.name.endswith(".csv"):
            df = pd.read_csv(uploaded_file)
        elif uploaded_file.name.endswith(".txt"):
            df = pd.read_table(uploaded_file, sep='\t')  # TODO : doc: assume contents are seperated by Tabs.

            chosen_column = st.selectbox("Choose text column for analysis", df.columns)
            # convert to scattertext corpus
            corpus = stx.CorpusFromPandas(
                df,
                category_col=df.head(),
                text_col=df[chosen_column],
                nlp=nlp,
                ). build()
            # create visualization
            # customize parameters
            customize_category_name = st.text_input('Enter the category name')
            customize_non_category_name = st.text_input('Enter the non-category name')
            html = stx.produce_scattertext_explorer(corpus,
                                       category=chosen_column,
                                       category_name=customize_category_name,
                                       not_category_name=customize_non_category_name,
                                       width_in_pixels=1000,
                                       minimum_term_frequency=0,
                                       metadata=df)
            st.components.v1.html(html)
        else:
            st.error("Unsupported file format.")

    # function2: generate plot from databases
    elif st.button('Download from Online Databases'):  # TODO doc: Explain: analyze abstract.
        # scopus & lens
        if uploaded_file.name.endswith(".csv"):
            df = pd.read_csv(uploaded_file)
            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns)
            if chosen_column == 'Abstract':
                st.write("This column cannot be selected, please select again")
            else:
                # make plot
                corpus = stx.CorpusFromPandas(
                    df,
                    category_col=df[chosen_column],
                    text_col='Abstract',
                    nlp=nlp,
                ).build()
                # generate HTML visualization
                input_category_name = input('Enter the category name')
                customize_category_name = st.text_input('Customize parameter', input_category_name)
                input_non_category_name = input('Enter the non-category name')
                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
                html = stx.produce_scattertext_explorer(corpus,
                                                        category=chosen_column,
                                                        category_name=customize_category_name,
                                                        not_category_name=customize_non_category_name,
                                                        width_in_pixels=1000,
                                                        minimum_term_frequency=0,
                                                        metadata=df)
                st.components.v1.html(html)

        # web of science
        elif uploaded_file.name.endswith(".txt"):
            df = pd.read_table(uploaded_file, sep='\t')
            chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head())
            if chosen_column == 'AB':
                st.write("This column cannot be selected, please select again")
            else:
                # make plot
                corpus = stx.CorpusFromPandas(
                    df,
                    category_col=df[chosen_column],
                    text_col='Abstract',
                    nlp=nlp,
                ).build()

                # generate HTML visualization
                input_category_name = input('Enter the category name')
                customize_category_name = st.text_input('Customize parameter', input_category_name)
                input_non_category_name = input('Enter the non-category name')
                customize_non_category_name = st.text_input('Customize parameter', input_non_category_name)
                html = stx.produce_scattertext_explorer(corpus,
                                                        category=chosen_column,
                                                        category_name=customize_category_name,
                                                        not_category_name=customize_non_category_name,
                                                        width_in_pixels=1000,
                                                        minimum_term_frequency=0,
                                                        metadata=df)

                st.components.v1.html(html)


        else:
            st.error("Unsupported file format.")

else:
    st.write("Please upload a CSV or TXT file to begin.")