# To run streamlit, go to terminal and type: 'streamlit run app.py'
# Core Packages ###########################
import os
import shutil
from datetime import datetime

import docx2txt
import PyPDF2

import streamlit as st
import pandas as pd

from model import BertLightningModel

import openai
import base64

#######################################################################################################################
current_path = os.path.abspath(os.path.dirname(__file__))

project_title = "Auto-Grader"
project_desc = "The Auto-Grader app is a tool that uses natural language processing and machine learning algorithms to automatically grade essays. " \
               "This app uses Microsoft's Deberta v3-large model to evaluate essays using 6 criterions: cohesion, syntax, vocabulary, phraseology, grammar, and convention."
project_icon = "46_Knowledge-white4.png"

project_link = "https://huggingface.co/microsoft/deberta-v3-large \n https://www.kaggle.com/code/yasufuminakama/fb3-deberta-v3-base-baseline-train/notebook"
st.set_page_config(page_title=project_title, initial_sidebar_state='collapsed',page_icon=project_icon)

# additional info from the readme
add_info_md = """

"""
#######################################################################################################################
@st.cache(allow_output_mutation=True)
def load_model():
    CONFIG = dict(
        model_name="microsoft/deberta-v3-large",
        num_classes=6,
        lr=2e-5,

        batch_size=8,
        num_workers=8,
        max_length=512,
        weight_decay=0.01,

        accelerator='gpu',
        max_epochs=5,
        accumulate_grad_batches=4,
        precision=16,
        gradient_clip_val=1000,
        train_size=0.8,
        num_cross_val_splits=5,
        num_frozen_layers=20,  # out of 24 in deberta
    )
    model = BertLightningModel.load_from_checkpoint(os.path.join(current_path,'tf_model.ckpt'),config=CONFIG, map_location='cpu')

    return model

def predict(_input, _model):
    tokens = _model.tokenizer([_input], return_tensors='pt')
    outputs = _model(tokens)[0].tolist()

    df = pd.DataFrame({
        'Criterion': ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions'],
        'Grade': outputs
    })

    return df

def convert_ave_to_score_range(score, max, min):
    fg = (score-1) * ((max-min)/3) + min
    return fg

def run_model(answer, min_score, max_score):
    evaluation = 0

    st.write('Grading essay..')
    evaluation = predict(answer,st.session_state['model'])

    # get the average of the score evaluations
    ave = evaluation['Grade'].mean()

    grade = convert_ave_to_score_range(ave, max_score, min_score)
    grade = round(grade)
    final_grade = max_score if max_score < grade else grade

    return evaluation, final_grade


def run_model_on_list(answers, min_score, max_score):
    evaluations = []
    final_grades = []

    for answer in answers:
        st.write(f'Grading essay #{answers.index(answer)+1}..')
        evaluations.append(predict(answer,st.session_state['model']))
        ave = evaluations[answers.index(answer)]['Grade'].mean()

        grade = convert_ave_to_score_range(ave, max_score, min_score)
        grade = round(grade)
        final_grades.append(max_score if max_score < grade else grade)

    return evaluations, final_grades

def read_pdf(file):
    pdfReader = PyPDF2.PdfReader(file)
    count = len(pdfReader.pages)
    all_page_text = ""
    for i in range(count):
        page = pdfReader.pages[i]
        all_page_text += page.extract_text()

    return all_page_text

def openai_chat(prompt, model, max_tokens):
    response = openai.Completion.create(
        engine=model,
        prompt=prompt,
        max_tokens=max_tokens,
        n=1,
        stop=None,
        temperature=0.7,
    )
    message = response.choices[0].text.strip()
    return message

def run_chatgpt(essay_list, min_score, max_score):
    st.markdown("***")
    st.subheader("ChatGPT Evaluation")
    openai.api_key = os.environ["JOSHUA_FREEEDU_OPENAI_API_KEY"]

    chatgpt_prompts = []
    chatgpt_responses = []
    prompt = f"Evaluate the following essay using the Criterion: [cohesion, syntax, vocabulary, phraseology, grammar, conventions]. " \
             f"Use a {min_score} to {max_score} score range for each, and provide one final score using the same score range. " \
             f"Give some explanation for each score on each criteria, and one summarized feedback on the whole essay.\n"
    for i, answer in enumerate(essay_list):
        if i == 0:
            prompt += "\nEssay: \n"
        else:
            prompt = "Essay: \n"
        prompt += answer
        response = openai_chat(prompt=prompt, model="text-davinci-003", max_tokens=1024)
        # response = openai_chat(prompt=prompt, model="text-curie-001", max_tokens=1024)
        chatgpt_prompts.append(prompt)
        chatgpt_responses.append(response)

    chatgpt_prompt_val = ""
    chatgpt_response_val = ""
    for i, val in enumerate(chatgpt_prompts):
        chatgpt_prompt_val = chatgpt_prompt_val + val + "\n"
        chatgpt_response_val = chatgpt_response_val + chatgpt_responses[i] + "\n"
    chatgpt_prompt_ta = st.text_area("ChatGPT Prompt",
                                     placeholder="Prompt used on ChatGPT will display here.",
                                     value=chatgpt_prompt_val, height=500, disabled=True)
    chatgpt_response_ta = st.text_area("ChatGPT Response",
                                       placeholder="ChatGPT's evaluations will display here.",
                                       value=chatgpt_response_val, height=500, disabled=True)

    return chatgpt_response_ta

def main():
    head_col = st.columns([1,8])
    with head_col[0]:
        st.image(project_icon)
    with head_col[1]:
        st.title(project_title)
    st.write(project_desc)
    st.write(f"Source Project: {project_link}")
    # expander = st.expander("Additional Information")
    # expander.markdown(add_info_md)
    st.markdown("***")
    st.subheader("")
#########################################
    # instructions
    st.subheader("How to use: ")
    st.write("1a. Input your essay in the text box; or \n\n"
            "1b. Click on Upload Files to submit one or multiple essays saved in doc, docx, or txt format.")
    st.write("2. Click on \'Grade Essay\' button to run the model.")

#########################################

    uploaded_files = st.file_uploader('Upload Files', accept_multiple_files=True, type=['docx','txt','pdf'])
    essays = [] # List of essays extracted from uploaded files
    filenames = [] # list of the filenames; used in the final output dataframe
    ta_val = "" # Value for the text area
    upload_flag = False
    eval_flag = False
    st.session_state['model'] = load_model()

    #If a file/s is uploaded, disable input in the text area; then, display the essays list
    if uploaded_files:
        upload_flag = True

        # Create fresh temp folder for the uploaded files
        if os.path.exists("temp"):
            shutil.rmtree("temp")
        os.makedirs("temp")

        # iterate through each uploaded file
        for uploaded_file in uploaded_files:
            contents = ""
            filenames.append(uploaded_file.name) # Add each file name to the list

            # Save each uploaded file to temp folder
            with open(os.path.join("temp", uploaded_file.name), "wb") as f:
                f.write(uploaded_file.read())

                # Parse the contents of the uploaded file according to their extension txt docx or pdf
                if uploaded_file.name.split(".")[-1] == "docx": # docx files
                    contents += docx2txt.process(os.path.join("temp", uploaded_file.name)) + "\n"

                elif uploaded_file.name.split(".")[-1] == "pdf": # pdf files
                    contents += read_pdf(uploaded_file) + "\n"

                else: # txt files
                    for line in uploaded_file.getvalue().decode().splitlines():
                        contents += line + "\n"

            #Add the compiled contents of the file into the 'essays' list before going to the next uploaded file
            essays.append(contents)

            #ta_val will be the preview of all the essays in the text area; display index numbering if there are more than one file
            ta_val += f"[{uploaded_files.index(uploaded_file)}]\n" + contents + "\n" if len(uploaded_files)>1 else contents

        shutil.rmtree("temp")

    # text area input for the essay, button to run the model, other widgets
    response_ta = st.text_area("Essay:",placeholder="Input your essay here.",height=500, value=ta_val, disabled=upload_flag)
    col1,col2,col3 = st.columns(3)
    min_score = col1.number_input('Minimum Score',0,100,0)
    max_score = col2.number_input('Maximum Score',0,100,10)
    run_button = st.button("Grade Essay")
    enable_chatgpt = st.checkbox("Evaluate with ChatGPT?", help="Works best on one essay at a time.")

    # run the model when the button is clicked
    if run_button:
        if not response_ta: # if the text area is empty:
            st.error("Please input the essay in the corresponding text area.")
        elif min_score >= max_score:
            st.error("Minimum score must be less than maximum score.")
        else: # run model
            if not upload_flag:
                eval_df, score = run_model(answer=response_ta, min_score=min_score, max_score=max_score)

                # output message template
                msg = f"Your essay score is: {score} (Minimum Possible Score: {min_score} | Maximum Possible Score: {max_score})"
                st.write(msg)
                st.write("Score breakdown (1-4):")
                st.dataframe(eval_df)

            else:
                # 'evals' is a list of dataframes [DataFrame]
                # 'scores' is a list of the grades [int]
                evals, scores = run_model_on_list(essays, min_score, max_score)

                # Display the final grade for each uploaded file
                grades_df = pd.DataFrame({'Filename':filenames,'Final Grade':scores})
                st.write("Grading done!")
                st.dataframe(grades_df)

                st.write("Criteria are graded within the range of 1-4. \nMerging grades with evaluations..")

                # Add a column 'Filename' to each set of evaluation, and set the value to the corresponding file name
                for f in filenames:
                    evals[filenames.index(f)]['Filename'] = f

                # Combine the list of evaluation dataframes 'evals' into one single dataframe 'evals_df'
                evals_df = pd.concat([df for df in evals])

                # Combine the Grades with the Evaluations, then show it
                final_df = grades_df.merge(evals_df, on='Filename')
                st.dataframe(final_df)

                eval_flag = True
                st.session_state["final_df"] = final_df

            # ChatGPT Evaluation Section
            if enable_chatgpt:
                chatgpt_response = run_chatgpt(essays, min_score, max_score)

    if eval_flag:
        # Old button for downloading the combined grades and evaluations into a csv file
        # st.download_button("Download results", data=downloadfile, file_name=f'aes_result_{curr_time}.csv', mime='text/csv')

        # New: Download links (links don't refresh the web page after clicking
        downloadfile = final_df.to_csv().encode('utf-8')
        curr_time = datetime.now().strftime("%b-%d-%Y %H:%M:%S")

        b64 = base64.b64encode(downloadfile).decode()
        download_link = f'<a href="data:text/csv;base64,{b64}" download="aes_result_{curr_time}.csv">Download results</a>'
        st.markdown(download_link, unsafe_allow_html=True)

        if enable_chatgpt:
            # Add a download link to the file
            b64 = base64.b64encode(chatgpt_response.encode()).decode()
            chatgpt_download_link = f'<a href="data:text/plain;base64,{b64}" download="aes_chatgpt_result_{curr_time}.txt">Download ChatGPT Feedback</a>'
            st.markdown(chatgpt_download_link, unsafe_allow_html=True)

    ###################################################################################################################
    # examples section
    st.subheader("")
    st.markdown("***")
    st.subheader("")

    # generate examples dropdown
    st.subheader("Here are a few example essays:")
    examples = {}
    examples_fnames = []
    examples_dir = os.path.join(current_path,'examples')
    for ex in os.listdir(examples_dir):
        examples[ex] = open(os.path.join(examples_dir, ex), 'rb')
        examples_fnames.append(ex)

    selected_example = st.multiselect('Select an example essay:',examples_fnames)
    ex_names = []
    ex_essays = []
    ta_val_ex = ""


    # iterate through each selected example
    for example in selected_example:
        contents_ex = ""  # Compile all the essays from each file and display them on the text area
        ex_names.append(example)  # Add each file name to the list

        # Parse the contents of the selected file according to their extension txt docx or pdf
        if example.split(".")[-1] == "docx":  # docx files
            contents_ex += docx2txt.process(os.path.join("examples", example)) + "\n"

        elif example.split(".")[-1] == "pdf":  # pdf files
            contents_ex += read_pdf(open(os.path.join("examples", example),'rb')) + "\n"

        else:  # txt files
            for line in examples[example].read().decode().splitlines():
                contents_ex += line + "\n"

        # Add the compiled contents of the file into the 'essays' list before going to the next uploaded file
        ex_essays.append(contents_ex)
        # ta_val will be the preview of all the essays in the text area; display index numbering if there are more than one file
        ta_val_ex += f"[{selected_example.index(example)}]\n" + contents_ex + "\n" if len(selected_example) > 1 else contents_ex

    # widgets and button to run on examples
    response_ta_ex = st.text_area("Essay/s:",placeholder="Your selected example essay/s will display here.",value=ta_val_ex,key='response_ta_ex',height=500, disabled=True)
    col1_ex, col2_ex, col3_ex = st.columns(3)
    min_score_ex = col1_ex.number_input('Minimum Score',0,100,0,key='min_score_ex')
    max_score_ex = col2_ex.number_input('Maximum Score',0,100,10,key='max_score_ex')
    run_button_ex = st.button("Grade Example Essay/s")
    enable_chatgpt_ex = st.checkbox("Evaluate example with ChatGPT?", help="Works best on one essay at a time.")

    # button is clicked
    if run_button_ex:
        if not response_ta_ex: # if any text area is empty:
            st.error("Please input the essay in their corresponding text area.")
        if min_score_ex >= max_score_ex:
            st.error("Minimum score must be less than maximum score.")
        else: # run model
            # 'evals' is a list of dataframes [DataFrame]
            # 'scores' is a list of the grades [int]
            evals_ex, scores_ex = run_model_on_list(ex_essays, min_score_ex, max_score_ex)

            # Display the final grade for each uploaded file
            grades_df_ex = pd.DataFrame({'Filename': ex_names, 'Final Grade': scores_ex})
            st.write("Grading done!")
            st.dataframe(grades_df_ex)

            st.write("Criteria are graded within the range of 1-4. \nMerging grades with evaluations..")

            # Add a column 'Filename' to each set of evaluation, and set the value to the corresponding file name
            for f in ex_names:
                evals_ex[ex_names.index(f)]['Filename'] = f

            # Combine the list of evaluation dataframes 'evals' into one single dataframe 'evals_df'
            evals_df_ex = pd.concat([df for df in evals_ex])

            # Combine the Grades with the Evaluations, then show it
            final_df_ex = grades_df_ex.merge(evals_df_ex, on='Filename')
            st.dataframe(final_df_ex)

            # ChatGPT Evaluation Section
            if enable_chatgpt_ex:
                run_chatgpt(ex_essays, min_score_ex, max_score_ex)

if __name__ == '__main__':
    main()

# To run streamlit, go to terminal and type: 'streamlit run app-source.py'