NCTCMumbai's picture
Upload 2571 files
0b8359d
raw
history blame
14.2 kB
# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains utility functions for the global objectives library."""
# Dependency imports
import tensorflow as tf
def weighted_sigmoid_cross_entropy_with_logits(labels,
logits,
positive_weights=1.0,
negative_weights=1.0,
name=None):
"""Computes a weighting of sigmoid cross entropy given `logits`.
Measures the weighted probability error in discrete classification tasks in
which classes are independent and not mutually exclusive. For instance, one
could perform multilabel classification where a picture can contain both an
elephant and a dog at the same time. The class weight multiplies the
different types of errors.
For brevity, let `x = logits`, `z = labels`, `c = positive_weights`,
`d = negative_weights` The
weighed logistic loss is
```
c * z * -log(sigmoid(x)) + d * (1 - z) * -log(1 - sigmoid(x))
= c * z * -log(1 / (1 + exp(-x))) - d * (1 - z) * log(exp(-x) / (1 + exp(-x)))
= c * z * log(1 + exp(-x)) + d * (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
= c * z * log(1 + exp(-x)) + d * (1 - z) * (x + log(1 + exp(-x)))
= (1 - z) * x * d + (1 - z + c * z ) * log(1 + exp(-x))
= - d * x * z + d * x + (d - d * z + c * z ) * log(1 + exp(-x))
```
To ensure stability and avoid overflow, the implementation uses the identity
log(1 + exp(-x)) = max(0,-x) + log(1 + exp(-abs(x)))
and the result is computed as
```
= -d * x * z + d * x
+ (d - d * z + c * z ) * (max(0,-x) + log(1 + exp(-abs(x))))
```
Note that the loss is NOT an upper bound on the 0-1 loss, unless it is divided
by log(2).
Args:
labels: A `Tensor` of type `float32` or `float64`. `labels` can be a 2D
tensor with shape [batch_size, num_labels] or a 3D tensor with shape
[batch_size, num_labels, K].
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has
shape [batch_size, num_labels, K], the loss is computed separately on each
slice [:, :, k] of `logits`.
positive_weights: A `Tensor` that holds positive weights and has the
following semantics according to its shape:
scalar - A global positive weight.
1D tensor - must be of size K, a weight for each 'attempt'
2D tensor - of size [num_labels, K'] where K' is either K or 1.
The `positive_weights` will be expanded to the left to match the
dimensions of logits and labels.
negative_weights: A `Tensor` that holds positive weight and has the
semantics identical to positive_weights.
name: A name for the operation (optional).
Returns:
A `Tensor` of the same shape as `logits` with the componentwise
weighted logistic losses.
"""
with tf.name_scope(
name,
'weighted_logistic_loss',
[logits, labels, positive_weights, negative_weights]) as name:
labels, logits, positive_weights, negative_weights = prepare_loss_args(
labels, logits, positive_weights, negative_weights)
softplus_term = tf.add(tf.maximum(-logits, 0.0),
tf.log(1.0 + tf.exp(-tf.abs(logits))))
weight_dependent_factor = (
negative_weights + (positive_weights - negative_weights) * labels)
return (negative_weights * (logits - labels * logits) +
weight_dependent_factor * softplus_term)
def weighted_hinge_loss(labels,
logits,
positive_weights=1.0,
negative_weights=1.0,
name=None):
"""Computes weighted hinge loss given logits `logits`.
The loss applies to multi-label classification tasks where labels are
independent and not mutually exclusive. See also
`weighted_sigmoid_cross_entropy_with_logits`.
Args:
labels: A `Tensor` of type `float32` or `float64`. Each entry must be
either 0 or 1. `labels` can be a 2D tensor with shape
[batch_size, num_labels] or a 3D tensor with shape
[batch_size, num_labels, K].
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has
shape [batch_size, num_labels, K], the loss is computed separately on each
slice [:, :, k] of `logits`.
positive_weights: A `Tensor` that holds positive weights and has the
following semantics according to its shape:
scalar - A global positive weight.
1D tensor - must be of size K, a weight for each 'attempt'
2D tensor - of size [num_labels, K'] where K' is either K or 1.
The `positive_weights` will be expanded to the left to match the
dimensions of logits and labels.
negative_weights: A `Tensor` that holds positive weight and has the
semantics identical to positive_weights.
name: A name for the operation (optional).
Returns:
A `Tensor` of the same shape as `logits` with the componentwise
weighted hinge loss.
"""
with tf.name_scope(
name, 'weighted_hinge_loss',
[logits, labels, positive_weights, negative_weights]) as name:
labels, logits, positive_weights, negative_weights = prepare_loss_args(
labels, logits, positive_weights, negative_weights)
positives_term = positive_weights * labels * tf.maximum(1.0 - logits, 0)
negatives_term = (negative_weights * (1.0 - labels)
* tf.maximum(1.0 + logits, 0))
return positives_term + negatives_term
def weighted_surrogate_loss(labels,
logits,
surrogate_type='xent',
positive_weights=1.0,
negative_weights=1.0,
name=None):
"""Returns either weighted cross-entropy or hinge loss.
For example `surrogate_type` is 'xent' returns the weighted cross
entropy loss.
Args:
labels: A `Tensor` of type `float32` or `float64`. Each entry must be
between 0 and 1. `labels` can be a 2D tensor with shape
[batch_size, num_labels] or a 3D tensor with shape
[batch_size, num_labels, K].
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has
shape [batch_size, num_labels, K], each slice [:, :, k] represents an
'attempt' to predict `labels` and the loss is computed per slice.
surrogate_type: A string that determines which loss to return, supports
'xent' for cross-entropy and 'hinge' for hinge loss.
positive_weights: A `Tensor` that holds positive weights and has the
following semantics according to its shape:
scalar - A global positive weight.
1D tensor - must be of size K, a weight for each 'attempt'
2D tensor - of size [num_labels, K'] where K' is either K or 1.
The `positive_weights` will be expanded to the left to match the
dimensions of logits and labels.
negative_weights: A `Tensor` that holds positive weight and has the
semantics identical to positive_weights.
name: A name for the operation (optional).
Returns:
The weigthed loss.
Raises:
ValueError: If value of `surrogate_type` is not supported.
"""
with tf.name_scope(
name, 'weighted_loss',
[logits, labels, surrogate_type, positive_weights,
negative_weights]) as name:
if surrogate_type == 'xent':
return weighted_sigmoid_cross_entropy_with_logits(
logits=logits,
labels=labels,
positive_weights=positive_weights,
negative_weights=negative_weights,
name=name)
elif surrogate_type == 'hinge':
return weighted_hinge_loss(
logits=logits,
labels=labels,
positive_weights=positive_weights,
negative_weights=negative_weights,
name=name)
raise ValueError('surrogate_type %s not supported.' % surrogate_type)
def expand_outer(tensor, rank):
"""Expands the given `Tensor` outwards to a target rank.
For example if rank = 3 and tensor.shape is [3, 4], this function will expand
to such that the resulting shape will be [1, 3, 4].
Args:
tensor: The tensor to expand.
rank: The target dimension.
Returns:
The expanded tensor.
Raises:
ValueError: If rank of `tensor` is unknown, or if `rank` is smaller than
the rank of `tensor`.
"""
if tensor.get_shape().ndims is None:
raise ValueError('tensor dimension must be known.')
if len(tensor.get_shape()) > rank:
raise ValueError(
'`rank` must be at least the current tensor dimension: (%s vs %s).' %
(rank, len(tensor.get_shape())))
while len(tensor.get_shape()) < rank:
tensor = tf.expand_dims(tensor, 0)
return tensor
def build_label_priors(labels,
weights=None,
positive_pseudocount=1.0,
negative_pseudocount=1.0,
variables_collections=None):
"""Creates an op to maintain and update label prior probabilities.
For each label, the label priors are estimated as
(P + sum_i w_i y_i) / (P + N + sum_i w_i),
where y_i is the ith label, w_i is the ith weight, P is a pseudo-count of
positive labels, and N is a pseudo-count of negative labels. The index i
ranges over all labels observed during all evaluations of the returned op.
Args:
labels: A `Tensor` with shape [batch_size, num_labels]. Entries should be
in [0, 1].
weights: Coefficients representing the weight of each label. Must be either
a Tensor of shape [batch_size, num_labels] or `None`, in which case each
weight is treated as 1.0.
positive_pseudocount: Number of positive labels used to initialize the label
priors.
negative_pseudocount: Number of negative labels used to initialize the label
priors.
variables_collections: Optional list of collections for created variables.
Returns:
label_priors: An op to update the weighted label_priors. Gives the
current value of the label priors when evaluated.
"""
dtype = labels.dtype.base_dtype
num_labels = get_num_labels(labels)
if weights is None:
weights = tf.ones_like(labels)
# We disable partitioning while constructing dual variables because they will
# be updated with assign, which is not available for partitioned variables.
partitioner = tf.get_variable_scope().partitioner
try:
tf.get_variable_scope().set_partitioner(None)
# Create variable and update op for weighted label counts.
weighted_label_counts = tf.contrib.framework.model_variable(
name='weighted_label_counts',
shape=[num_labels],
dtype=dtype,
initializer=tf.constant_initializer(
[positive_pseudocount] * num_labels, dtype=dtype),
collections=variables_collections,
trainable=False)
weighted_label_counts_update = weighted_label_counts.assign_add(
tf.reduce_sum(weights * labels, 0))
# Create variable and update op for the sum of the weights.
weight_sum = tf.contrib.framework.model_variable(
name='weight_sum',
shape=[num_labels],
dtype=dtype,
initializer=tf.constant_initializer(
[positive_pseudocount + negative_pseudocount] * num_labels,
dtype=dtype),
collections=variables_collections,
trainable=False)
weight_sum_update = weight_sum.assign_add(tf.reduce_sum(weights, 0))
finally:
tf.get_variable_scope().set_partitioner(partitioner)
label_priors = tf.div(
weighted_label_counts_update,
weight_sum_update)
return label_priors
def convert_and_cast(value, name, dtype):
"""Convert input to tensor and cast to dtype.
Args:
value: An object whose type has a registered Tensor conversion function,
e.g. python numerical type or numpy array.
name: Name to use for the new Tensor, if one is created.
dtype: Optional element type for the returned tensor.
Returns:
A tensor.
"""
return tf.cast(tf.convert_to_tensor(value, name=name), dtype=dtype)
def prepare_loss_args(labels, logits, positive_weights, negative_weights):
"""Prepare arguments for weighted loss functions.
If needed, will convert given arguments to appropriate type and shape.
Args:
labels: labels or labels of the loss function.
logits: Logits of the loss function.
positive_weights: Weight on the positive examples.
negative_weights: Weight on the negative examples.
Returns:
Converted labels, logits, positive_weights, negative_weights.
"""
logits = tf.convert_to_tensor(logits, name='logits')
labels = convert_and_cast(labels, 'labels', logits.dtype)
if len(labels.get_shape()) == 2 and len(logits.get_shape()) == 3:
labels = tf.expand_dims(labels, [2])
positive_weights = convert_and_cast(positive_weights, 'positive_weights',
logits.dtype)
positive_weights = expand_outer(positive_weights, logits.get_shape().ndims)
negative_weights = convert_and_cast(negative_weights, 'negative_weights',
logits.dtype)
negative_weights = expand_outer(negative_weights, logits.get_shape().ndims)
return labels, logits, positive_weights, negative_weights
def get_num_labels(labels_or_logits):
"""Returns the number of labels inferred from labels_or_logits."""
if labels_or_logits.get_shape().ndims <= 1:
return 1
return labels_or_logits.get_shape()[1].value