Spaces:
Runtime error
Runtime error
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import copy | |
from codeScripts.Dependencies.SentenceTransformer2 import * | |
from codeScripts.utils import create_file_path, clean_words, save_json | |
#Done | |
class SemanticOutput(): | |
""" | |
Class to store the semantic processing and extract results | |
""" | |
def __init__(self, settings): | |
self.nota_spacy = dict() | |
self.nota_spacy["spacy"] = dict() | |
self.nota_spacy["bert"] = dict() | |
self.nota_spacy_experimento = dict() | |
self.nota_spacy_experimento["spacy"] = dict() | |
self.nota_spacy_experimento["bert"] = dict() | |
self.identifyLineofResponse = dict() | |
self.identifyLineofResponse["spacy"] = dict() | |
self.identifyLineofResponse["bert"] = dict() | |
self.identifyLineofResponse_toexcel = [] | |
self.notas_calculadas = dict() | |
self.notas_calculadas["spacy"] = dict() | |
self.notas_calculadas["bert"] = dict() | |
self.min_umbral = [] | |
self.max_umbral = [] | |
r= settings.UmbralesSimilitud.split(",") | |
for i in r: | |
c_w= clean_words(i) | |
self.min_umbral.append(float(c_w[0]+'.'+c_w[1])) | |
self.max_umbral.append(float(c_w[2]+'.'+c_w[3])) | |
self.notas_calculadas["spacy"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = [] | |
self.notas_calculadas["bert"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = [] | |
#variables taken from the settings | |
self.answersDF_json2 = dict() | |
self.answersDF_json2["spacy"] = dict() | |
self.answersDF_json2["bert"] = dict() | |
self.indiceMinipreguntas = settings.indice_minipreguntas | |
#print("AAAAAAAAAAAA") | |
#print(self.indiceMinipreguntas) | |
self.LofRespThreshold = settings.LofRespThreshold | |
self.indx = 1 | |
def __createDict__(self, nota_spacy:dict(), studentID, minipregunta, similarity_type, type = 0): | |
if studentID not in nota_spacy[similarity_type].keys(): | |
nota_spacy[similarity_type][studentID] = dict() | |
if type == 0: | |
nota_spacy[similarity_type][studentID][minipregunta]= [] | |
else: | |
nota_spacy[similarity_type][studentID][minipregunta]= dict() | |
return nota_spacy | |
def __plotHistogram__(self, save_file, x): | |
""" | |
Generates an histogram of the given data. | |
Inputs: | |
save_file: The path where the histogram is to be generated. | |
x: The data to be represented. | |
""" | |
ax= sns.histplot( | |
data = x, | |
stat = "count", | |
kde = True, | |
color = "black" | |
) | |
ax.set(xlabel='Deviation', ylabel='Count') | |
figure = ax.get_figure() | |
figure.savefig(create_file_path(save_file,3)) | |
del figure | |
ax.cla() | |
def initInforms(self, studentID, minipregunta, similarity_type): | |
""" | |
This function is for initializing the variables where data is to be stored. | |
Inputs: | |
studentID: The id of the student | |
minipregunta: The minipregunta that is being studied | |
""" | |
#identificar donde está la respuesta por minipreguta | |
self.identifyLineofResponse = self.__createDict__(self.identifyLineofResponse, studentID, minipregunta, similarity_type, 1) | |
#almacenar notas del evaluation process | |
#self.nota_spacy_experimento = self.__createDict__(self.nota_spacy_experimento, studentID, similarity_type, 1) | |
self.nota_spacy_experimento[similarity_type][studentID] = dict() | |
#Almacenar similitudes por minipregunta | |
self.nota_spacy = self.__createDict__(self.nota_spacy, studentID, minipregunta, similarity_type) | |
#separar y almacenar una a una las lineas de la respuesta | |
self.answersDF_json2[similarity_type][studentID] = dict() | |
self.answersDF_json2[similarity_type][studentID]["respuesta"] = dict() | |
def updateInformsBucle(self, studentID, minipregunta, response, response_label, numberOfSentences, similarity, similarity_type, isMaxSimil): | |
""" | |
This function is the previous needed step before using updateInforms. Stores the important iterative-generated information | |
Inputs: | |
-studentID: The id of the student | |
-minipregunta: The minipregunta that is being studied | |
-response: The student's response | |
-response_label: The generated label that indicates the sentence number of the extracted response in the text. | |
-numberOfSentences: The number of splitted sentences. | |
-similarity: The obtained similarity score. | |
-isMaxSimil: If the similarity score is the highest obtained at the moment or not. | |
""" | |
#Storing the similarity score obtained for only one sentence | |
if numberOfSentences == 1: | |
self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)] = dict() | |
self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Similitud"] = similarity | |
self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Frase"] = response | |
self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Lineas"] = response_label | |
self.answersDF_json2[similarity_type][studentID]["respuesta"][self.indx] = response | |
self.indx+=1 | |
else: | |
self.indx = 1 | |
#storing the maximum similarity for each set of sentences length | |
if isMaxSimil: | |
self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)] = dict() | |
self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["MaxSimilitud"] = similarity | |
self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Frase"] = response | |
self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Lineas"] = response_label | |
#storing the similarity in every case | |
self.nota_spacy[similarity_type][studentID][minipregunta].append([response, None, None] if response == "" else [response, similarity, response_label]) | |
def updateInforms(self, studentID, umbralL, umbralH, calculatedMark, similarity_type, response = ""): | |
""" | |
This function is to store the obtained results from the processing of one response. | |
Inputs: | |
-studentID: The id of the student | |
-umbralL: The fixed low threshold (config json) | |
-umbralH: The fixed high threshold (config json) | |
-calculatedMark: The calculated mark. | |
-response: The student's response | |
""" | |
#print("ZZZZZ") | |
#print(similarity_type) | |
#storing calculated marks | |
print("indiceMinipreguntas: " + str(self.indiceMinipreguntas)) | |
self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas)) | |
#storing where the model thought the answer was | |
for minipregunta in self.indiceMinipreguntas: | |
#print("EEEEE") | |
#print(self.identifyLineofResponse) | |
aux = copy.deepcopy(self.identifyLineofResponse) | |
for indx in aux[similarity_type][studentID][minipregunta].keys(): | |
if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075: | |
del self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx] | |
#Getting the number of the guess | |
if response == "": | |
self.identifyLineofResponse_toexcel.append([minipregunta, ""]) | |
else: | |
max_n = -999999 | |
indx_queue = 0 | |
queue = [] | |
highlightedrows = "" | |
highlightedmarks = "" | |
for iter in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys(): | |
for indx in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys(): | |
if self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] > max_n and not indx in queue and self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]>self.LofRespThreshold: | |
max_n = self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] | |
indx_queue = indx | |
queue.append(indx_queue) | |
highlightedrows = highlightedrows + str(indx_queue) + " " | |
highlightedmarks = highlightedmarks + str(max_n) + " " | |
max_n = -999999 | |
indx_queue = 0 | |
self.identifyLineofResponse_toexcel.append([minipregunta, highlightedrows, highlightedmarks]) | |
highlightedrows = "" | |
highlightedmarks = "" | |
queue = [] | |
def saveSimilarityResults(self, settings, similarity_type): | |
""" | |
Saves the recopiled data in the corresponding format and path differentiating the types of semantic calculation. | |
Inputs: | |
-settings: system settings. | |
-similarity_type: "spacy" if similarity is being calculated from Spacy (if it is not, bert is selected) | |
""" | |
savePrefix = "Spacy - " if similarity_type == "spacy" else str(settings.modelr) + str(settings.epochr) + " - " | |
#previous name - "AnalisisSemantico.json" | |
save_json(create_file_path(savePrefix + "SimilitudPorConjunto.json",2), self.nota_spacy[similarity_type]) | |
save_json(create_file_path(savePrefix + "MaxSimilitudPorConjunto.json",2), self.nota_spacy_experimento[similarity_type]) | |
save_json(create_file_path(savePrefix + "LineaRespuesta.json",2), self.identifyLineofResponse[similarity_type]) | |
save_json(create_file_path(savePrefix + "RespuestaSeparadaPorFrases.json",2), self.answersDF_json2[similarity_type]) | |
Notasdf = pd.DataFrame() | |
for intervaloUmbral in self.notas_calculadas[similarity_type]: | |
Notasdf[intervaloUmbral] = self.notas_calculadas[similarity_type][intervaloUmbral] | |
Notasdf.to_excel(create_file_path(savePrefix +'NotasCalculadas.xlsx',2), sheet_name='notas') | |
#self.__plotHistogram__(savePrefix + "HistogramaNotasGeneradas.png", self.notas_calculadas[similarity_type]) | |
class SintacticOutput(): | |
""" | |
Class to store the sintactic processing | |
""" | |
def __init__(self): | |
self.leg_FH =[] | |
self.leg_mu = [] | |
def saveLegibilityResults(self): | |
""" | |
Saves the recopiled data in the corresponding format. | |
""" | |
save_json(create_file_path("FH-Readability.json",2), self.leg_FH, False) | |
save_json(create_file_path("mu-Readability.json",2), self.leg_mu, False) | |
x = [] | |
for i in range(len(self.leg_FH)): | |
x.append(i) | |
plt.figure(figsize=(15,7)) | |
plt.plot(x, self.leg_FH, label = "FH", color = (0.1,0.1,0.1)) | |
plt.plot(x, self.leg_mu, '--', label = "mu", color = (0.5,0.5,0.5)) | |
plt.xlabel("Student") | |
plt.ylabel("Legibility (0-100)") | |
plt.legend(loc=1) | |
plt.title("FH vs mu") | |
plt.xticks(rotation=-45) | |
plt.grid() | |
plt.savefig(create_file_path("Img_FHvsMu.png",3)) | |
plt.cla() | |
class OrtographicOutput(): | |
""" | |
Class to store the ortographic processing | |
""" | |
def __init__(self): | |
self.notaOrtografia = [] | |
self.mistakes = [] | |
self.number_mistakes = [] | |
def saveOrtographicResults(self): | |
""" | |
Saves the ortographic generated marks. | |
""" | |
save_json(create_file_path("NotasOrtografia.json",2), self.notaOrtografia, False) | |