Spaces:

xiomarablanco
/

plentas

Runtime error

Raúl Bravo Rabassa

First commit

8e10d25 almost 2 years ago

12.1 kB

	import seaborn as sns
	import matplotlib.pyplot as plt
	import pandas as pd
	import copy

	from codeScripts.Dependencies.SentenceTransformer2 import *
	from codeScripts.utils import create_file_path, clean_words, save_json

	#Done

	class SemanticOutput():
	"""
	Class to store the semantic processing and extract results
	"""
	def __init__(self, settings):

	self.nota_spacy = dict()
	self.nota_spacy["spacy"] = dict()
	self.nota_spacy["bert"] = dict()

	self.nota_spacy_experimento = dict()
	self.nota_spacy_experimento["spacy"] = dict()
	self.nota_spacy_experimento["bert"] = dict()

	self.identifyLineofResponse = dict()
	self.identifyLineofResponse["spacy"] = dict()
	self.identifyLineofResponse["bert"] = dict()

	self.identifyLineofResponse_toexcel = []

	self.notas_calculadas = dict()
	self.notas_calculadas["spacy"] = dict()
	self.notas_calculadas["bert"] = dict()

	self.min_umbral = []
	self.max_umbral = []
	r= settings.UmbralesSimilitud.split(",")
	for i in r:
	c_w= clean_words(i)
	self.min_umbral.append(float(c_w[0]+'.'+c_w[1]))
	self.max_umbral.append(float(c_w[2]+'.'+c_w[3]))
	self.notas_calculadas["spacy"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = []
	self.notas_calculadas["bert"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = []


	#variables taken from the settings
	self.answersDF_json2 = dict()
	self.answersDF_json2["spacy"] = dict()
	self.answersDF_json2["bert"] = dict()

	self.indiceMinipreguntas = settings.indice_minipreguntas
	print("AAAAAAAAAAAA")
	print(self.indiceMinipreguntas)
	self.LofRespThreshold = settings.LofRespThreshold

	self.indx = 1

	def __createDict__(self, nota_spacy:dict(), studentID, minipregunta, similarity_type, type = 0):

	if studentID not in nota_spacy[similarity_type].keys():
	nota_spacy[similarity_type][studentID] = dict()

	if type == 0:
	nota_spacy[similarity_type][studentID][minipregunta]= []
	else:
	nota_spacy[similarity_type][studentID][minipregunta]= dict()
	return nota_spacy

	def __plotHistogram__(self, save_file, x):
	"""
	Generates an histogram of the given data.
	Inputs:
	save_file: The path where the histogram is to be generated.
	x: The data to be represented.
	"""
	ax= sns.histplot(
	data = x,
	stat = "count",
	kde = True,
	color = "black"
	)
	ax.set(xlabel='Deviation', ylabel='Count')

	figure = ax.get_figure()
	figure.savefig(create_file_path(save_file,3))
	del figure
	ax.cla()

	def initInforms(self, studentID, minipregunta, similarity_type):
	"""
	This function is for initializing the variables where data is to be stored.
	Inputs:
	studentID: The id of the student
	minipregunta: The minipregunta that is being studied
	"""
	#identificar donde está la respuesta por minipreguta
	self.identifyLineofResponse = self.__createDict__(self.identifyLineofResponse, studentID, minipregunta, similarity_type, 1)

	#almacenar notas del evaluation process
	#self.nota_spacy_experimento = self.__createDict__(self.nota_spacy_experimento, studentID, similarity_type, 1)

	self.nota_spacy_experimento[similarity_type][studentID] = dict()

	#Almacenar similitudes por minipregunta
	self.nota_spacy = self.__createDict__(self.nota_spacy, studentID, minipregunta, similarity_type)

	#separar y almacenar una a una las lineas de la respuesta
	self.answersDF_json2[similarity_type][studentID] = dict()
	self.answersDF_json2[similarity_type][studentID]["respuesta"] = dict()

	def updateInformsBucle(self, studentID, minipregunta, response, response_label, numberOfSentences, similarity, similarity_type, isMaxSimil):
	"""
	This function is the previous needed step before using updateInforms. Stores the important iterative-generated information
	Inputs:
	-studentID: The id of the student
	-minipregunta: The minipregunta that is being studied
	-response: The student's response
	-response_label: The generated label that indicates the sentence number of the extracted response in the text.
	-numberOfSentences: The number of splitted sentences.
	-similarity: The obtained similarity score.
	-isMaxSimil: If the similarity score is the highest obtained at the moment or not.
	"""
	#Storing the similarity score obtained for only one sentence
	if numberOfSentences == 1:
	self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)] = dict()
	self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Similitud"] = similarity
	self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Frase"] = response
	self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Lineas"] = response_label

	self.answersDF_json2[similarity_type][studentID]["respuesta"][self.indx] = response
	self.indx+=1
	else:
	self.indx = 1

	#storing the maximum similarity for each set of sentences length
	if isMaxSimil:
	self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)] = dict()
	self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["MaxSimilitud"] = similarity
	self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Frase"] = response
	self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Lineas"] = response_label

	#storing the similarity in every case
	self.nota_spacy[similarity_type][studentID][minipregunta].append([response, None, None] if response == "" else [response, similarity, response_label])

	def updateInforms(self, studentID, umbralL, umbralH, calculatedMark, similarity_type, response = ""):
	"""
	This function is to store the obtained results from the processing of one response.
	Inputs:
	-studentID: The id of the student
	-umbralL: The fixed low threshold (config json)
	-umbralH: The fixed high threshold (config json)
	-calculatedMark: The calculated mark.
	-response: The student's response
	"""
	print("ZZZZZ")
	print(similarity_type)
	#storing calculated marks
	self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))

	#storing where the model thought the answer was
	for minipregunta in self.indiceMinipreguntas:
	print("EEEEE")
	print(self.identifyLineofResponse)
	aux = copy.deepcopy(self.identifyLineofResponse)
	for indx in aux[similarity_type][studentID][minipregunta].keys():
	if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:
	del self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]

	#Getting the number of the guess
	if response == "":
	self.identifyLineofResponse_toexcel.append([minipregunta, ""])
	else:
	max_n = -999999
	indx_queue = 0
	queue = []
	highlightedrows = ""
	highlightedmarks = ""

	for iter in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys():
	for indx in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys():
	if self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] > max_n and not indx in queue and self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]>self.LofRespThreshold:
	max_n = self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]
	indx_queue = indx
	queue.append(indx_queue)
	highlightedrows = highlightedrows + str(indx_queue) + " "
	highlightedmarks = highlightedmarks + str(max_n) + " "
	max_n = -999999
	indx_queue = 0

	self.identifyLineofResponse_toexcel.append([minipregunta, highlightedrows, highlightedmarks])
	highlightedrows = ""
	highlightedmarks = ""
	queue = []

	def saveSimilarityResults(self, settings, similarity_type):
	"""
	Saves the recopiled data in the corresponding format and path differentiating the types of semantic calculation.
	Inputs:
	-settings: system settings.
	-similarity_type: "spacy" if similarity is being calculated from Spacy (if it is not, bert is selected)
	"""
	savePrefix = "Spacy - " if similarity_type == "spacy" else str(settings.modelr) + str(settings.epochr) + " - "

	#previous name - "AnalisisSemantico.json"
	save_json(create_file_path(savePrefix + "SimilitudPorConjunto.json",2), self.nota_spacy[similarity_type])
	save_json(create_file_path(savePrefix + "MaxSimilitudPorConjunto.json",2), self.nota_spacy_experimento[similarity_type])
	save_json(create_file_path(savePrefix + "LineaRespuesta.json",2), self.identifyLineofResponse[similarity_type])
	save_json(create_file_path(savePrefix + "RespuestaSeparadaPorFrases.json",2), self.answersDF_json2[similarity_type])


	Notasdf = pd.DataFrame()
	for intervaloUmbral in self.notas_calculadas[similarity_type]:
	Notasdf[intervaloUmbral] = self.notas_calculadas[similarity_type][intervaloUmbral]

	Notasdf.to_excel(create_file_path(savePrefix +'NotasCalculadas.xlsx',2), sheet_name='notas')

	#self.__plotHistogram__(savePrefix + "HistogramaNotasGeneradas.png", self.notas_calculadas[similarity_type])

	class SintacticOutput():
	"""
	Class to store the sintactic processing
	"""
	def __init__(self):
	self.leg_FH =[]
	self.leg_mu = []

	def saveLegibilityResults(self):
	"""
	Saves the recopiled data in the corresponding format.
	"""
	save_json(create_file_path("FH-Readability.json",2), self.leg_FH, False)
	save_json(create_file_path("mu-Readability.json",2), self.leg_mu, False)

	x = []
	for i in range(len(self.leg_FH)):
	x.append(i)
	plt.figure(figsize=(15,7))
	plt.plot(x, self.leg_FH, label = "FH", color = (0.1,0.1,0.1))
	plt.plot(x, self.leg_mu, '--', label = "mu", color = (0.5,0.5,0.5))
	plt.xlabel("Student")
	plt.ylabel("Legibility (0-100)")
	plt.legend(loc=1)
	plt.title("FH vs mu")
	plt.xticks(rotation=-45)
	plt.grid()
	plt.savefig(create_file_path("Img_FHvsMu.png",3))
	plt.cla()

	class OrtographicOutput():
	"""
	Class to store the ortographic processing
	"""
	def __init__(self):
	self.notaOrtografia = []
	self.mistakes = []
	self.number_mistakes = []

	def saveOrtographicResults(self):
	"""
	Saves the ortographic generated marks.
	"""
	save_json(create_file_path("NotasOrtografia.json",2), self.notaOrtografia, False)