OFA-OCR-dedao-demo001

Runtime error

App Files Files Community

OFA-OCR-dedao-demo001 / fairseq /examples /latent_depth /latent_depth_src /loss /latent_depth.py

JustinLin610

first commit

ee21b96 almost 2 years ago

raw

history blame

No virus

3.8 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import math

	import torch
	from torch.nn.modules.loss import _Loss


	class LatentLayersKLLoss(_Loss):
	def __init__(self, args):
	super().__init__()
	self.args = args

	def forward(self, layer_samples, lang_idx, update_num, sample_size):
	prior = self.args.prior
	samples = layer_samples[lang_idx]
	eps = 1e-7
	if prior == "uniform":
	# uniform prior
	kl_loss = (samples * (torch.log(samples + eps) - math.log(0.5))).sum(-1)
	elif prior == "agged_posterior":
	# aggregated posterior
	y_t = torch.stack([x.detach() for x in layer_samples], dim=0)
	agged_q = torch.sum(y_t, dim=0)
	row_norm = agged_q.sum(-1)
	normed_agg_q = agged_q / row_norm
	kl_loss = (
	samples * (torch.log(samples + eps) - torch.log(normed_agg_q + eps))
	).sum(-1)
	else:
	raise NotImplementedError("The specified prior is not implemented.")

	# normalized by number of layers
	kl_loss /= layer_samples[0].size()[0]
	kl_weight = min(
	self.args.sparsity_weight,
	(update_num - self.args.soft_update)
	* self.args.sparsity_weight
	/ self.args.anneal_updates,
	)
	kl_loss = kl_weight sample_size
	return kl_loss


	class LatentLayersSparsityLoss(_Loss):
	def __init__(self, args):
	super().__init__()
	self.args = args

	def is_valid(self, update_num):
	if self.args.target_layers <= 0:
	return False
	return update_num > (self.args.soft_update + self.args.anneal_updates)

	def forward(self, layer_samples_list, update_num, sample_size):
	batch_loss = 0
	share_loss = 0
	global_sparsity_loss = 0
	layer_samples = torch.stack(layer_samples_list, dim=0)
	if (
	self.args.target_layers > 0 or self.args.share_weight > 0
	) and update_num > (self.args.soft_update + self.args.anneal_updates):
	# anneal sparsity weight
	if update_num < (self.args.anneal_updates + self.args.soft_update):
	weight_anneal = 0
	elif update_num < (2 * self.args.anneal_updates + self.args.soft_update):
	weight_anneal = (
	(update_num - self.args.soft_update - self.args.anneal_updates)
	* self.args.share_weight
	/ self.args.anneal_updates
	)
	else:
	weight_anneal = 1
	# compute ratio among languages
	layer_utilization = torch.sum(layer_samples, dim=0)
	layer_utilization /= layer_samples.size()[0]
	if self.args.share_weight > 0:
	# encouraging sharing across languages
	share_loss = sum(
	-1.0 * v * math.log(v) for v in layer_utilization if v > 0
	)
	batch_loss += (
	weight_anneal * self.args.share_weight * sample_size * share_loss
	)
	if self.args.target_layers > 0:
	# computed expected number of layers selected
	expeted_layers = sum(layer_utilization)
	# compute l2 loss wrt target number of layers
	global_sparsity_loss = (expeted_layers - self.args.target_layers) ** 2
	batch_loss += (
	weight_anneal
	* self.args.share_weight
	* sample_size
	* global_sparsity_loss
	)
	return batch_loss