Spaces:

xiomarablanco
/

plentas

Runtime error

File size: 9,454 Bytes

import gradio as gr
import json
from flask import jsonify
from sentence_transformers import SentenceTransformer, InputExample, util
from codeScripts.utils import save_json, load_json, create_file_path
from plentas import Plentas
import pandas as pd
import zipfile
import os
import shutil
from datetime import datetime
import tablib

def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
  
    error = ""
    excelPath  = ""

    copySpanishDictionaries()

    try:        
        if not txtFileInput:
            txtFileInput = "./assets/qa.txt"
        else:
            txtFileInput = txtFileInput.name

        configuration = readQATextFile(txtFileInput)

        configuration["ortographyPercentage"] = float(orthographyPercentage)
        configuration["syntaxPercentage"] = float(syntaxPercentage)
        configuration["semanticPercentage"] = float(semanticPercentage)

        if studentsRange == "":
            studentsRange = "All"

        configuration["students"] = studentsRange

        if not uploadedFile:
            uploadedFilePath = "./assets/test_data.zip"
        else:
            uploadedFilePath = uploadedFile.name
                     
        config_json = load_json("configV2.json")
        
        # #configuring plentas methodology
        response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
        # # #overwriting the custom settings for the settings from the api      
        response.setApiSettings(configuration)
    
        modelResult = response.processApiData()

        # modelJson = json.dumps(modelResult)
        
        excelPath = exportResultToExcelFile(modelResult)

    except Exception as e:
        error = "Oops: " + str(e)
    
    return [error, excelPath]

def exportResultToExcelFile(modelResult):

    excelData = []

    studentsArray = modelResult[0]
    index = 0
    for item in studentsArray:
        print("ITEM - " + str(item))
        studentData = item[index]
        excelData.append(studentData)
        index+= 1

    tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
    tableResults.json=json.dumps(excelData)
    tableExport=tableResults.export('xlsx')
    outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
    # outputFilePath = './output/plentas_output.xlsx'
    with open(outputFilePath, 'wb') as f:  # open the xlsx file
        f.write(tableExport)  # write the dataset to the xlsx file
    f.close()
    return outputFilePath

def copySpanishDictionaries():
    try:
        shutil.copy("./assets/hunspell_dictionaries/es_ES/es_ES.aff", "/home/user/.local/lib/python3.8/site-packages/hunspell/dictionaries/es_ES.aff")
        shutil.copy("./assets/hunspell_dictionaries/es_ES/es_ES.dic", "/home/user/.local/lib/python3.8/site-packages/hunspell/dictionaries/es_ES.dic")
    except Exception as ex:
        print("Error copying dictionaries" + str(ex))

def readQATextFile(qaTextFilePath):
    configuration = {}

    f = open(qaTextFilePath, 'r')
    lines = f.readlines()

    count = 0
    qCount=1
    
    q = ""
    a = ""
    while count < len(lines):
        if q == "" or q == "\n":
            q = lines[count]
            count += 1
            continue

        if a == "" or a == "\n":
            a = lines[count]
            count += 1            
            
        if q != "" and a != "":
            configuration["minip" + str(qCount)] = q
            configuration["minir" + str(qCount)] = a
            qCount += 1
            q = ""
            a = ""
    
    return configuration

def createTeacherJson(configuration):
    """

    This function extracts the information about the subquestions and subanswers and puts them in the correct format.

    Inputs:

        config: The configured info from the api.

    Outputs:

        teachersJson: The generated dictionary with the subquestions.

    """
    teachersJson = {"enunciado": "", "minipreguntas":[], "keywords":""}

    #5 is the maximum number of permitted subquestions in the configuration2 page
    
    for i in range(5):
       
        try:
            teachersJson["minipreguntas"].append({
				"minipregunta": configuration["minip" + str(i+1)],
				"minirespuesta": configuration["minir" + str(i+1)]
			})

        except:
            break

    return teachersJson

def extractZipData(ruta_zip):
    """

    This function extracts the students's answers from the zip file (the one the teacher has in the task section).

    Inputs:

        ruta_zip: The path inherited from answersTodict

    """
    #defining the path where the extracted info is to be stored
    ruta_extraccion = create_file_path("StudentAnswers/", doctype= 1)
    #extracting the info
    archivo_zip = zipfile.ZipFile(ruta_zip, "r")
    try:
        archivo_zip.extractall(pwd=None, path=ruta_extraccion)
    except:
        pass
    archivo_zip.close()
    
def removeHtmlFromString(string):
    """

    This function removes the html tags from the student's response.

    Inputs:

        -string: The student's response

    Outputs:

        -new_string: The filtered response

    """
    string = string.encode('utf-8', 'replace')
    string = string.decode('utf-8', 'replace')
    new_string = ""
    skipChar = 0
    for char in string:
        if char == "<":
            skipChar = 1
        elif char == ">":
            skipChar = 0
        else:
            if not skipChar:        
                new_string = new_string+char

    new_string = new_string.encode('utf-8', 'replace')
    new_string = new_string.decode('utf-8', 'replace')
    return new_string

def answersTodict(zip_path):
    """

    This function extracts the students's answers and stacks them in one specific format so that it can be processed next.

    Inputs:

        ruta_zip: The path where the zip file is stored

    Outputs:

        studentAnswersDict: The dictionary with all the responses

    """
    #extracting the data
    extractZipData(zip_path)
    
    studentAnswersDict = []

    #stacking the information of each extracted folder
    for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
        for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
            student_name = student.split("(")
            student_name = student_name[0]
            try:
                #opening the file

                #fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'comments.txt', doctype= 1))
                #where the actual response is
                fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')                
                #reading it
                lineas = fichero.readlines()

                #removing html                
                lineas[0] = removeHtmlFromString(lineas[0])           
                                
                #saving it                                
                studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})

            except:
                studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})

    #saving the final dictionary
    save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
    return studentAnswersDict


zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
#dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")

labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="")
labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
downloadExcelButton = gr.outputs.File('Resultados')

iface = gr.Interface(fn=Main
    , inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
    , outputs=[labelError, downloadExcelButton]
    , title = "PLENTAS"
)

#iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
iface.launch(share = False,enable_queue=True, show_error =True)