# To run streamlit, go to terminal and type: 'streamlit run app.py'
# Core Packages ###########################
import os
import shutil
from datetime import datetime
import docx2txt
import PyPDF2
import streamlit as st
import pandas as pd
from model import BertLightningModel
import openai
import base64
#######################################################################################################################
current_path = os.path.abspath(os.path.dirname(__file__))
project_title = "Auto-Grader"
project_desc = "The Auto-Grader app is a tool that uses natural language processing and machine learning algorithms to automatically grade essays. " \
"This app uses Microsoft's Deberta v3-large model to evaluate essays using 6 criterions: cohesion, syntax, vocabulary, phraseology, grammar, and convention."
project_icon = "46_Knowledge-white4.png"
project_link = "https://huggingface.co/microsoft/deberta-v3-large \n https://www.kaggle.com/code/yasufuminakama/fb3-deberta-v3-base-baseline-train/notebook"
st.set_page_config(page_title=project_title, initial_sidebar_state='collapsed',page_icon=project_icon)
# additional info from the readme
add_info_md = """
"""
#######################################################################################################################
@st.cache(allow_output_mutation=True)
def load_model():
CONFIG = dict(
model_name="microsoft/deberta-v3-large",
num_classes=6,
lr=2e-5,
batch_size=8,
num_workers=8,
max_length=512,
weight_decay=0.01,
accelerator='gpu',
max_epochs=5,
accumulate_grad_batches=4,
precision=16,
gradient_clip_val=1000,
train_size=0.8,
num_cross_val_splits=5,
num_frozen_layers=20, # out of 24 in deberta
)
model = BertLightningModel.load_from_checkpoint(os.path.join(current_path,'tf_model.ckpt'),config=CONFIG, map_location='cpu')
return model
def predict(_input, _model):
tokens = _model.tokenizer([_input], return_tensors='pt')
outputs = _model(tokens)[0].tolist()
df = pd.DataFrame({
'Criterion': ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions'],
'Grade': outputs
})
return df
def convert_ave_to_score_range(score, max, min):
fg = (score-1) * ((max-min)/3) + min
return fg
def run_model(answer, min_score, max_score):
evaluation = 0
st.write('Grading essay..')
evaluation = predict(answer,st.session_state['model'])
# get the average of the score evaluations
ave = evaluation['Grade'].mean()
grade = convert_ave_to_score_range(ave, max_score, min_score)
grade = round(grade)
final_grade = max_score if max_score < grade else grade
return evaluation, final_grade
def run_model_on_list(answers, min_score, max_score):
evaluations = []
final_grades = []
for answer in answers:
st.write(f'Grading essay #{answers.index(answer)+1}..')
evaluations.append(predict(answer,st.session_state['model']))
ave = evaluations[answers.index(answer)]['Grade'].mean()
grade = convert_ave_to_score_range(ave, max_score, min_score)
grade = round(grade)
final_grades.append(max_score if max_score < grade else grade)
return evaluations, final_grades
def read_pdf(file):
pdfReader = PyPDF2.PdfReader(file)
count = len(pdfReader.pages)
all_page_text = ""
for i in range(count):
page = pdfReader.pages[i]
all_page_text += page.extract_text()
return all_page_text
def openai_chat(prompt, model, max_tokens):
response = openai.Completion.create(
engine=model,
prompt=prompt,
max_tokens=max_tokens,
n=1,
stop=None,
temperature=0.7,
)
message = response.choices[0].text.strip()
return message
def run_chatgpt(essay_list, min_score, max_score):
st.markdown("***")
st.subheader("ChatGPT Evaluation")
openai.api_key = os.environ["JOSHUA_FREEEDU_OPENAI_API_KEY"]
chatgpt_prompts = []
chatgpt_responses = []
prompt = f"Evaluate the following essay using the Criterion: [cohesion, syntax, vocabulary, phraseology, grammar, conventions]. " \
f"Use a {min_score} to {max_score} score range for each, and provide one final score using the same score range. " \
f"Give some explanation for each score on each criteria, and one summarized feedback on the whole essay.\n"
for i, answer in enumerate(essay_list):
if i == 0:
prompt += "\nEssay: \n"
else:
prompt = "Essay: \n"
prompt += answer
response = openai_chat(prompt=prompt, model="text-davinci-003", max_tokens=1024)
# response = openai_chat(prompt=prompt, model="text-curie-001", max_tokens=1024)
chatgpt_prompts.append(prompt)
chatgpt_responses.append(response)
chatgpt_prompt_val = ""
chatgpt_response_val = ""
for i, val in enumerate(chatgpt_prompts):
chatgpt_prompt_val = chatgpt_prompt_val + val + "\n"
chatgpt_response_val = chatgpt_response_val + chatgpt_responses[i] + "\n"
chatgpt_prompt_ta = st.text_area("ChatGPT Prompt",
placeholder="Prompt used on ChatGPT will display here.",
value=chatgpt_prompt_val, height=500, disabled=True)
chatgpt_response_ta = st.text_area("ChatGPT Response",
placeholder="ChatGPT's evaluations will display here.",
value=chatgpt_response_val, height=500, disabled=True)
return chatgpt_response_ta
def main():
head_col = st.columns([1,8])
with head_col[0]:
st.image(project_icon)
with head_col[1]:
st.title(project_title)
st.write(project_desc)
st.write(f"Source Project: {project_link}")
# expander = st.expander("Additional Information")
# expander.markdown(add_info_md)
st.markdown("***")
st.subheader("")
#########################################
# instructions
st.subheader("How to use: ")
st.write("1a. Input your essay in the text box; or \n\n"
"1b. Click on Upload Files to submit one or multiple essays saved in doc, docx, or txt format.")
st.write("2. Click on \'Grade Essay\' button to run the model.")
#########################################
uploaded_files = st.file_uploader('Upload Files', accept_multiple_files=True, type=['docx','txt','pdf'])
essays = [] # List of essays extracted from uploaded files
filenames = [] # list of the filenames; used in the final output dataframe
ta_val = "" # Value for the text area
upload_flag = False
eval_flag = False
st.session_state['model'] = load_model()
#If a file/s is uploaded, disable input in the text area; then, display the essays list
if uploaded_files:
upload_flag = True
# Create fresh temp folder for the uploaded files
if os.path.exists("temp"):
shutil.rmtree("temp")
os.makedirs("temp")
# iterate through each uploaded file
for uploaded_file in uploaded_files:
contents = ""
filenames.append(uploaded_file.name) # Add each file name to the list
# Save each uploaded file to temp folder
with open(os.path.join("temp", uploaded_file.name), "wb") as f:
f.write(uploaded_file.read())
# Parse the contents of the uploaded file according to their extension txt docx or pdf
if uploaded_file.name.split(".")[-1] == "docx": # docx files
contents += docx2txt.process(os.path.join("temp", uploaded_file.name)) + "\n"
elif uploaded_file.name.split(".")[-1] == "pdf": # pdf files
contents += read_pdf(uploaded_file) + "\n"
else: # txt files
for line in uploaded_file.getvalue().decode().splitlines():
contents += line + "\n"
#Add the compiled contents of the file into the 'essays' list before going to the next uploaded file
essays.append(contents)
#ta_val will be the preview of all the essays in the text area; display index numbering if there are more than one file
ta_val += f"[{uploaded_files.index(uploaded_file)}]\n" + contents + "\n" if len(uploaded_files)>1 else contents
shutil.rmtree("temp")
# text area input for the essay, button to run the model, other widgets
response_ta = st.text_area("Essay:",placeholder="Input your essay here.",height=500, value=ta_val, disabled=upload_flag)
col1,col2,col3 = st.columns(3)
min_score = col1.number_input('Minimum Score',0,100,0)
max_score = col2.number_input('Maximum Score',0,100,10)
run_button = st.button("Grade Essay")
enable_chatgpt = st.checkbox("Evaluate with ChatGPT?", help="Works best on one essay at a time.")
# run the model when the button is clicked
if run_button:
if not response_ta: # if the text area is empty:
st.error("Please input the essay in the corresponding text area.")
elif min_score >= max_score:
st.error("Minimum score must be less than maximum score.")
else: # run model
if not upload_flag:
eval_df, score = run_model(answer=response_ta, min_score=min_score, max_score=max_score)
# output message template
msg = f"Your essay score is: {score} (Minimum Possible Score: {min_score} | Maximum Possible Score: {max_score})"
st.write(msg)
st.write("Score breakdown (1-4):")
st.dataframe(eval_df)
else:
# 'evals' is a list of dataframes [DataFrame]
# 'scores' is a list of the grades [int]
evals, scores = run_model_on_list(essays, min_score, max_score)
# Display the final grade for each uploaded file
grades_df = pd.DataFrame({'Filename':filenames,'Final Grade':scores})
st.write("Grading done!")
st.dataframe(grades_df)
st.write("Criteria are graded within the range of 1-4. \nMerging grades with evaluations..")
# Add a column 'Filename' to each set of evaluation, and set the value to the corresponding file name
for f in filenames:
evals[filenames.index(f)]['Filename'] = f
# Combine the list of evaluation dataframes 'evals' into one single dataframe 'evals_df'
evals_df = pd.concat([df for df in evals])
# Combine the Grades with the Evaluations, then show it
final_df = grades_df.merge(evals_df, on='Filename')
st.dataframe(final_df)
eval_flag = True
st.session_state["final_df"] = final_df
# ChatGPT Evaluation Section
if enable_chatgpt:
chatgpt_response = run_chatgpt(essays, min_score, max_score)
if eval_flag:
# Old button for downloading the combined grades and evaluations into a csv file
# st.download_button("Download results", data=downloadfile, file_name=f'aes_result_{curr_time}.csv', mime='text/csv')
# New: Download links (links don't refresh the web page after clicking
downloadfile = final_df.to_csv().encode('utf-8')
curr_time = datetime.now().strftime("%b-%d-%Y %H:%M:%S")
b64 = base64.b64encode(downloadfile).decode()
download_link = f'Download results'
st.markdown(download_link, unsafe_allow_html=True)
if enable_chatgpt:
# Add a download link to the file
b64 = base64.b64encode(chatgpt_response.encode()).decode()
chatgpt_download_link = f'Download ChatGPT Feedback'
st.markdown(chatgpt_download_link, unsafe_allow_html=True)
###################################################################################################################
# examples section
st.subheader("")
st.markdown("***")
st.subheader("")
# generate examples dropdown
st.subheader("Here are a few example essays:")
examples = {}
examples_fnames = []
examples_dir = os.path.join(current_path,'examples')
for ex in os.listdir(examples_dir):
examples[ex] = open(os.path.join(examples_dir, ex), 'rb')
examples_fnames.append(ex)
selected_example = st.multiselect('Select an example essay:',examples_fnames)
ex_names = []
ex_essays = []
ta_val_ex = ""
# iterate through each selected example
for example in selected_example:
contents_ex = "" # Compile all the essays from each file and display them on the text area
ex_names.append(example) # Add each file name to the list
# Parse the contents of the selected file according to their extension txt docx or pdf
if example.split(".")[-1] == "docx": # docx files
contents_ex += docx2txt.process(os.path.join("examples", example)) + "\n"
elif example.split(".")[-1] == "pdf": # pdf files
contents_ex += read_pdf(open(os.path.join("examples", example),'rb')) + "\n"
else: # txt files
for line in examples[example].read().decode().splitlines():
contents_ex += line + "\n"
# Add the compiled contents of the file into the 'essays' list before going to the next uploaded file
ex_essays.append(contents_ex)
# ta_val will be the preview of all the essays in the text area; display index numbering if there are more than one file
ta_val_ex += f"[{selected_example.index(example)}]\n" + contents_ex + "\n" if len(selected_example) > 1 else contents_ex
# widgets and button to run on examples
response_ta_ex = st.text_area("Essay/s:",placeholder="Your selected example essay/s will display here.",value=ta_val_ex,key='response_ta_ex',height=500, disabled=True)
col1_ex, col2_ex, col3_ex = st.columns(3)
min_score_ex = col1_ex.number_input('Minimum Score',0,100,0,key='min_score_ex')
max_score_ex = col2_ex.number_input('Maximum Score',0,100,10,key='max_score_ex')
run_button_ex = st.button("Grade Example Essay/s")
enable_chatgpt_ex = st.checkbox("Evaluate example with ChatGPT?", help="Works best on one essay at a time.")
# button is clicked
if run_button_ex:
if not response_ta_ex: # if any text area is empty:
st.error("Please input the essay in their corresponding text area.")
if min_score_ex >= max_score_ex:
st.error("Minimum score must be less than maximum score.")
else: # run model
# 'evals' is a list of dataframes [DataFrame]
# 'scores' is a list of the grades [int]
evals_ex, scores_ex = run_model_on_list(ex_essays, min_score_ex, max_score_ex)
# Display the final grade for each uploaded file
grades_df_ex = pd.DataFrame({'Filename': ex_names, 'Final Grade': scores_ex})
st.write("Grading done!")
st.dataframe(grades_df_ex)
st.write("Criteria are graded within the range of 1-4. \nMerging grades with evaluations..")
# Add a column 'Filename' to each set of evaluation, and set the value to the corresponding file name
for f in ex_names:
evals_ex[ex_names.index(f)]['Filename'] = f
# Combine the list of evaluation dataframes 'evals' into one single dataframe 'evals_df'
evals_df_ex = pd.concat([df for df in evals_ex])
# Combine the Grades with the Evaluations, then show it
final_df_ex = grades_df_ex.merge(evals_df_ex, on='Filename')
st.dataframe(final_df_ex)
# ChatGPT Evaluation Section
if enable_chatgpt_ex:
run_chatgpt(ex_essays, min_score_ex, max_score_ex)
if __name__ == '__main__':
main()
# To run streamlit, go to terminal and type: 'streamlit run app-source.py'