Spaces:
Runtime error
Runtime error
File size: 8,680 Bytes
8e10d25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import pandas as pd
import json
from codeScripts.Dependencies.SentenceTransformer2 import *
from codeScripts.utils import load_json, create_file_path
class GetSettings():
"""
This class stores the selected settings for the current experiment
"""
def __init__(self, config_settings, studentsData):
#extracting the settings from the configuration document
self.__getConfigSettings(config_settings)
#getting the responses to study
self.__getDatatoStudy(studentsData)
def __getDatatoStudy(self, data):
if data[0] == None:
#extracting the info from the path in the config json
self.__getData(self.json_file_in)
else:
#extracting the info from the selected file in the api
self.__getApiData(data)
def setApiSettings(self, api_settings):
"""
This function is to overwrite the parameters with the selected values from the api
Inputs:
-api_settings: dictionary with the stored parameters from the api
"""
#transforming string dict into dict
api_settings = json.loads(api_settings)
self.PesoOrtografia = api_settings["ortographyPercentage"]
self.PesoSintaxis = api_settings["syntaxPercentage"]
self.PesoSemantics = api_settings["semanticPercentage"]
self.rango_ID = api_settings["students"]
def __getConfigSettings(self, df):
"""
This method is used to import the settings from the config json
Inputs:
-df: The dataframe where the config json data is loaded
"""
#+++ General settings +++
#path where the dataset is stored
self.json_file_in = df["ruta_fichero_entrada"]
#path where output is to be stored
self.json_file_out = df["ruta_fichero_salida"]
#path to access hunspell components
self.hunspell_aff = df["ruta_hunspell"]["aff"]
self.hunspell_dic = df["ruta_hunspell"]["dic"]
#range of students to study ---- Will be overwritten from api
if df["Parametros_Analisis"]["estudiantes"]["Todos"]:
self.rango_ID = "All"
else:
self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"]
self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"])
self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1)
#+++ Ortography +++
#If the ortographic level is activated
self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"]
#Max number of permitted errors
self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"]
#Max number of permitted errors before beginning to substract
self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"]
#Level weight (rubrics)
self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"]
#+++ Syntax +++
#if the syntactic level is activated
self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"]
#max number of sentences and words permitted
self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"]
self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"]
#***weight of the level
self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"]
#+++ Semantics +++
#if the semantic level is activated
self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"]
#***weight of the level
self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"]
#--- Similarity ---
SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"]
self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage]
print("spacy_package", self.spacy_package)
#the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg)
self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage]
print("lofThreshold", self.LofRespThreshold)
#the different thresholds (min-max) to adapt the similarity score
self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage]
print("self.UmbralesSimilitud", self.UmbralesSimilitud)
#To configure only once the bert model parameters
model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"]
self.model_path = create_file_path('', doctype=4) + model_name
print("self.model_path", self.model_path)
self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"]
print("self.modelr", self.modelr)
self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"]
print("self.epochr", self.epochr)
self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr])
#Variables to store some values
self.studentID = ""
self.faltasOrto = 0
self.palabrasPorFrase = 0
self.minipreguntasMalSpacy = ""
self.minipreguntasMalBert = ""
def __getApiData(self, json_file):
"""
This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc)
"""
self.answersDF = pd.DataFrame(json_file[0])
self.id_number = 0
self.minipreguntas = []
self.minirespuestas = []
self.indice_minipreguntas = []
self.respuesta_prof = ""
self.enunciado = json_file[1]['enunciado']
self.prof_keywords = json_file[1]['keywords']
try:
i=0
while True:
self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta'])
self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta'])
self.indice_minipreguntas.append("minipregunta" + str(i))
if i == 0:
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
else:
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]
i+=1
except:
pass
info_profesor = []
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
info_profesor.append([minipregunta,minirespuesta])
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)
def __getData(self, json_file):
"""
This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc)
"""
self.answersDF = pd.DataFrame(load_json(json_file))
#self.answersDF_json = copy.deepcopy(data)
#self.answersDF_json2 = dict()
self.id_number = 0
self.minipreguntas = []
self.minirespuestas = []
self.indice_minipreguntas = []
self.respuesta_prof = ""
self.enunciado = self.answersDF['metadata'][0]['enunciado']
self.prof_keywords = self.answersDF['metadata'][0]['keywords']
try:
i=0
while True:
#for i in range(4):
self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta'])
self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta'])
self.indice_minipreguntas.append("minipregunta" + str(i))
if i == 0:
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
else:
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]
i+=1
except:
pass
#self.indice_minipreguntas.append("respuesta_completa")
#self.minirespuestas.append(self.respuesta_prof)
info_profesor = []
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
info_profesor.append([minipregunta,minirespuesta])
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) |