Spaces:

hoang-quoc-trung
/

sumen

Sleeping

File size: 3,909 Bytes

3d52ce7

import os
import csv
import torch
import numpy


def check_device(logger=None):
    if torch.cuda.is_available():    
        device = torch.device("cuda")
        logger.info("There are {} GPU(s) available.".format(torch.cuda.device_count()))
        logger.info('We will use the GPU: {}'.format(torch.cuda.get_device_name(0)))
    else:
        logger.info('No GPU available, using the CPU instead.')
        device = torch.device("cpu")
    return device


def print_trainable_parameters(model, logger):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    logger.info(
        "Total params: {}M ({}) || Trainable params: {} || Trainable: {}%".format(
            round(all_param/1000000),
            all_param,
            trainable_params,
            100 * trainable_params / all_param
        )
    )


def save_log(
    loss: float,
    bleu: float,
    edit_distance: float,
    exact_match: float,
    wer: float,
    exprate: float,
    exprate_error_1: float,
    exprate_error_2: float,
    exprate_error_3: float,
    file_name="test_log.csv",
):
    
    os.makedirs('log', exist_ok=True)
    file_path = os.path.join('log', file_name)
    with open(file_path, mode="a", newline="") as csv_file:
        fieldnames = [
            "loss",
            "bleu",
            "edit_distance",
            "exact_match",
            "wer",
            "exprate",
            "exprate_error_1",
            "exprate_error_2",
            "exprate_error_3"
        ]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        # write the header row
        if csv_file.tell() == 0:
            writer.writeheader()
        # write the data row
        writer.writerow(
            {
                "loss": loss,
                "bleu": bleu,
                "edit_distance": edit_distance,
                "exact_match": exact_match,
                "wer": wer,
                "exprate": exprate,
                "exprate_error_1": exprate_error_1,
                "exprate_error_2": exprate_error_2,
                "exprate_error_3": exprate_error_3,
            }
        )


def cmp_result(label,rec):
    dist_mat = numpy.zeros((len(label)+1, len(rec)+1),dtype='int32')
    dist_mat[0,:] = range(len(rec) + 1)
    dist_mat[:,0] = range(len(label) + 1)
    for i in range(1, len(label) + 1):
        for j in range(1, len(rec) + 1):
            hit_score = dist_mat[i-1, j-1] + (label[i-1] != rec[j-1])
            ins_score = dist_mat[i,j-1] + 1
            del_score = dist_mat[i-1, j] + 1
            dist_mat[i,j] = min(hit_score, ins_score, del_score)
    dist = dist_mat[len(label), len(rec)]
    return dist, len(label)


def compute_exprate(predictions, references):
    total_label = 0
    total_line = 0
    total_line_rec = 0
    total_line_error_1 = 0
    total_line_error_2 = 0 
    total_line_error_3 = 0
    for i in range(len(references)):
        pre = predictions[i].split()
        ref = references[i].split()
        dist, llen = cmp_result(pre, ref)
        total_label += llen
        total_line += 1
        if dist == 0:
            total_line_rec += 1
        elif dist ==1:
            total_line_error_1 +=1
        elif dist ==2:
            total_line_error_2 +=1
        elif dist ==3:
            total_line_error_3 +=1
    exprate = float(total_line_rec)/total_line
    error_1 = float(
        total_line_error_1 + total_line_rec
    )/total_line
    error_2 = float(
        total_line_error_2 + total_line_error_1 +total_line_rec
    )/total_line
    error_3 = float(
        total_line_error_3 + total_line_error_2 + total_line_error_1 + total_line_rec
    )/total_line
    return exprate, error_1, error_2, error_3