Spaces:
Sleeping
Sleeping
File size: 3,909 Bytes
3d52ce7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import csv
import torch
import numpy
def check_device(logger=None):
if torch.cuda.is_available():
device = torch.device("cuda")
logger.info("There are {} GPU(s) available.".format(torch.cuda.device_count()))
logger.info('We will use the GPU: {}'.format(torch.cuda.get_device_name(0)))
else:
logger.info('No GPU available, using the CPU instead.')
device = torch.device("cpu")
return device
def print_trainable_parameters(model, logger):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
logger.info(
"Total params: {}M ({}) || Trainable params: {} || Trainable: {}%".format(
round(all_param/1000000),
all_param,
trainable_params,
100 * trainable_params / all_param
)
)
def save_log(
loss: float,
bleu: float,
edit_distance: float,
exact_match: float,
wer: float,
exprate: float,
exprate_error_1: float,
exprate_error_2: float,
exprate_error_3: float,
file_name="test_log.csv",
):
os.makedirs('log', exist_ok=True)
file_path = os.path.join('log', file_name)
with open(file_path, mode="a", newline="") as csv_file:
fieldnames = [
"loss",
"bleu",
"edit_distance",
"exact_match",
"wer",
"exprate",
"exprate_error_1",
"exprate_error_2",
"exprate_error_3"
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
# write the header row
if csv_file.tell() == 0:
writer.writeheader()
# write the data row
writer.writerow(
{
"loss": loss,
"bleu": bleu,
"edit_distance": edit_distance,
"exact_match": exact_match,
"wer": wer,
"exprate": exprate,
"exprate_error_1": exprate_error_1,
"exprate_error_2": exprate_error_2,
"exprate_error_3": exprate_error_3,
}
)
def cmp_result(label,rec):
dist_mat = numpy.zeros((len(label)+1, len(rec)+1),dtype='int32')
dist_mat[0,:] = range(len(rec) + 1)
dist_mat[:,0] = range(len(label) + 1)
for i in range(1, len(label) + 1):
for j in range(1, len(rec) + 1):
hit_score = dist_mat[i-1, j-1] + (label[i-1] != rec[j-1])
ins_score = dist_mat[i,j-1] + 1
del_score = dist_mat[i-1, j] + 1
dist_mat[i,j] = min(hit_score, ins_score, del_score)
dist = dist_mat[len(label), len(rec)]
return dist, len(label)
def compute_exprate(predictions, references):
total_label = 0
total_line = 0
total_line_rec = 0
total_line_error_1 = 0
total_line_error_2 = 0
total_line_error_3 = 0
for i in range(len(references)):
pre = predictions[i].split()
ref = references[i].split()
dist, llen = cmp_result(pre, ref)
total_label += llen
total_line += 1
if dist == 0:
total_line_rec += 1
elif dist ==1:
total_line_error_1 +=1
elif dist ==2:
total_line_error_2 +=1
elif dist ==3:
total_line_error_3 +=1
exprate = float(total_line_rec)/total_line
error_1 = float(
total_line_error_1 + total_line_rec
)/total_line
error_2 = float(
total_line_error_2 + total_line_error_1 +total_line_rec
)/total_line
error_3 = float(
total_line_error_3 + total_line_error_2 + total_line_error_1 + total_line_rec
)/total_line
return exprate, error_1, error_2, error_3 |