Spaces:
Sleeping
Sleeping
import os | |
import csv | |
import torch | |
import numpy | |
def check_device(logger=None): | |
if torch.cuda.is_available(): | |
device = torch.device("cuda") | |
logger.info("There are {} GPU(s) available.".format(torch.cuda.device_count())) | |
logger.info('We will use the GPU: {}'.format(torch.cuda.get_device_name(0))) | |
else: | |
logger.info('No GPU available, using the CPU instead.') | |
device = torch.device("cpu") | |
return device | |
def print_trainable_parameters(model, logger): | |
""" | |
Prints the number of trainable parameters in the model. | |
""" | |
trainable_params = 0 | |
all_param = 0 | |
for _, param in model.named_parameters(): | |
all_param += param.numel() | |
if param.requires_grad: | |
trainable_params += param.numel() | |
logger.info( | |
"Total params: {}M ({}) || Trainable params: {} || Trainable: {}%".format( | |
round(all_param/1000000), | |
all_param, | |
trainable_params, | |
100 * trainable_params / all_param | |
) | |
) | |
def save_log( | |
loss: float, | |
bleu: float, | |
edit_distance: float, | |
exact_match: float, | |
wer: float, | |
exprate: float, | |
exprate_error_1: float, | |
exprate_error_2: float, | |
exprate_error_3: float, | |
file_name="test_log.csv", | |
): | |
os.makedirs('log', exist_ok=True) | |
file_path = os.path.join('log', file_name) | |
with open(file_path, mode="a", newline="") as csv_file: | |
fieldnames = [ | |
"loss", | |
"bleu", | |
"edit_distance", | |
"exact_match", | |
"wer", | |
"exprate", | |
"exprate_error_1", | |
"exprate_error_2", | |
"exprate_error_3" | |
] | |
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) | |
# write the header row | |
if csv_file.tell() == 0: | |
writer.writeheader() | |
# write the data row | |
writer.writerow( | |
{ | |
"loss": loss, | |
"bleu": bleu, | |
"edit_distance": edit_distance, | |
"exact_match": exact_match, | |
"wer": wer, | |
"exprate": exprate, | |
"exprate_error_1": exprate_error_1, | |
"exprate_error_2": exprate_error_2, | |
"exprate_error_3": exprate_error_3, | |
} | |
) | |
def cmp_result(label,rec): | |
dist_mat = numpy.zeros((len(label)+1, len(rec)+1),dtype='int32') | |
dist_mat[0,:] = range(len(rec) + 1) | |
dist_mat[:,0] = range(len(label) + 1) | |
for i in range(1, len(label) + 1): | |
for j in range(1, len(rec) + 1): | |
hit_score = dist_mat[i-1, j-1] + (label[i-1] != rec[j-1]) | |
ins_score = dist_mat[i,j-1] + 1 | |
del_score = dist_mat[i-1, j] + 1 | |
dist_mat[i,j] = min(hit_score, ins_score, del_score) | |
dist = dist_mat[len(label), len(rec)] | |
return dist, len(label) | |
def compute_exprate(predictions, references): | |
total_label = 0 | |
total_line = 0 | |
total_line_rec = 0 | |
total_line_error_1 = 0 | |
total_line_error_2 = 0 | |
total_line_error_3 = 0 | |
for i in range(len(references)): | |
pre = predictions[i].split() | |
ref = references[i].split() | |
dist, llen = cmp_result(pre, ref) | |
total_label += llen | |
total_line += 1 | |
if dist == 0: | |
total_line_rec += 1 | |
elif dist ==1: | |
total_line_error_1 +=1 | |
elif dist ==2: | |
total_line_error_2 +=1 | |
elif dist ==3: | |
total_line_error_3 +=1 | |
exprate = float(total_line_rec)/total_line | |
error_1 = float( | |
total_line_error_1 + total_line_rec | |
)/total_line | |
error_2 = float( | |
total_line_error_2 + total_line_error_1 +total_line_rec | |
)/total_line | |
error_3 = float( | |
total_line_error_3 + total_line_error_2 + total_line_error_1 + total_line_rec | |
)/total_line | |
return exprate, error_1, error_2, error_3 |