Spaces:
Sleeping
Sleeping
# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Contains utility functions for the global objectives library.""" | |
# Dependency imports | |
import tensorflow as tf | |
def weighted_sigmoid_cross_entropy_with_logits(labels, | |
logits, | |
positive_weights=1.0, | |
negative_weights=1.0, | |
name=None): | |
"""Computes a weighting of sigmoid cross entropy given `logits`. | |
Measures the weighted probability error in discrete classification tasks in | |
which classes are independent and not mutually exclusive. For instance, one | |
could perform multilabel classification where a picture can contain both an | |
elephant and a dog at the same time. The class weight multiplies the | |
different types of errors. | |
For brevity, let `x = logits`, `z = labels`, `c = positive_weights`, | |
`d = negative_weights` The | |
weighed logistic loss is | |
``` | |
c * z * -log(sigmoid(x)) + d * (1 - z) * -log(1 - sigmoid(x)) | |
= c * z * -log(1 / (1 + exp(-x))) - d * (1 - z) * log(exp(-x) / (1 + exp(-x))) | |
= c * z * log(1 + exp(-x)) + d * (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) | |
= c * z * log(1 + exp(-x)) + d * (1 - z) * (x + log(1 + exp(-x))) | |
= (1 - z) * x * d + (1 - z + c * z ) * log(1 + exp(-x)) | |
= - d * x * z + d * x + (d - d * z + c * z ) * log(1 + exp(-x)) | |
``` | |
To ensure stability and avoid overflow, the implementation uses the identity | |
log(1 + exp(-x)) = max(0,-x) + log(1 + exp(-abs(x))) | |
and the result is computed as | |
``` | |
= -d * x * z + d * x | |
+ (d - d * z + c * z ) * (max(0,-x) + log(1 + exp(-abs(x)))) | |
``` | |
Note that the loss is NOT an upper bound on the 0-1 loss, unless it is divided | |
by log(2). | |
Args: | |
labels: A `Tensor` of type `float32` or `float64`. `labels` can be a 2D | |
tensor with shape [batch_size, num_labels] or a 3D tensor with shape | |
[batch_size, num_labels, K]. | |
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has | |
shape [batch_size, num_labels, K], the loss is computed separately on each | |
slice [:, :, k] of `logits`. | |
positive_weights: A `Tensor` that holds positive weights and has the | |
following semantics according to its shape: | |
scalar - A global positive weight. | |
1D tensor - must be of size K, a weight for each 'attempt' | |
2D tensor - of size [num_labels, K'] where K' is either K or 1. | |
The `positive_weights` will be expanded to the left to match the | |
dimensions of logits and labels. | |
negative_weights: A `Tensor` that holds positive weight and has the | |
semantics identical to positive_weights. | |
name: A name for the operation (optional). | |
Returns: | |
A `Tensor` of the same shape as `logits` with the componentwise | |
weighted logistic losses. | |
""" | |
with tf.name_scope( | |
name, | |
'weighted_logistic_loss', | |
[logits, labels, positive_weights, negative_weights]) as name: | |
labels, logits, positive_weights, negative_weights = prepare_loss_args( | |
labels, logits, positive_weights, negative_weights) | |
softplus_term = tf.add(tf.maximum(-logits, 0.0), | |
tf.log(1.0 + tf.exp(-tf.abs(logits)))) | |
weight_dependent_factor = ( | |
negative_weights + (positive_weights - negative_weights) * labels) | |
return (negative_weights * (logits - labels * logits) + | |
weight_dependent_factor * softplus_term) | |
def weighted_hinge_loss(labels, | |
logits, | |
positive_weights=1.0, | |
negative_weights=1.0, | |
name=None): | |
"""Computes weighted hinge loss given logits `logits`. | |
The loss applies to multi-label classification tasks where labels are | |
independent and not mutually exclusive. See also | |
`weighted_sigmoid_cross_entropy_with_logits`. | |
Args: | |
labels: A `Tensor` of type `float32` or `float64`. Each entry must be | |
either 0 or 1. `labels` can be a 2D tensor with shape | |
[batch_size, num_labels] or a 3D tensor with shape | |
[batch_size, num_labels, K]. | |
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has | |
shape [batch_size, num_labels, K], the loss is computed separately on each | |
slice [:, :, k] of `logits`. | |
positive_weights: A `Tensor` that holds positive weights and has the | |
following semantics according to its shape: | |
scalar - A global positive weight. | |
1D tensor - must be of size K, a weight for each 'attempt' | |
2D tensor - of size [num_labels, K'] where K' is either K or 1. | |
The `positive_weights` will be expanded to the left to match the | |
dimensions of logits and labels. | |
negative_weights: A `Tensor` that holds positive weight and has the | |
semantics identical to positive_weights. | |
name: A name for the operation (optional). | |
Returns: | |
A `Tensor` of the same shape as `logits` with the componentwise | |
weighted hinge loss. | |
""" | |
with tf.name_scope( | |
name, 'weighted_hinge_loss', | |
[logits, labels, positive_weights, negative_weights]) as name: | |
labels, logits, positive_weights, negative_weights = prepare_loss_args( | |
labels, logits, positive_weights, negative_weights) | |
positives_term = positive_weights * labels * tf.maximum(1.0 - logits, 0) | |
negatives_term = (negative_weights * (1.0 - labels) | |
* tf.maximum(1.0 + logits, 0)) | |
return positives_term + negatives_term | |
def weighted_surrogate_loss(labels, | |
logits, | |
surrogate_type='xent', | |
positive_weights=1.0, | |
negative_weights=1.0, | |
name=None): | |
"""Returns either weighted cross-entropy or hinge loss. | |
For example `surrogate_type` is 'xent' returns the weighted cross | |
entropy loss. | |
Args: | |
labels: A `Tensor` of type `float32` or `float64`. Each entry must be | |
between 0 and 1. `labels` can be a 2D tensor with shape | |
[batch_size, num_labels] or a 3D tensor with shape | |
[batch_size, num_labels, K]. | |
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has | |
shape [batch_size, num_labels, K], each slice [:, :, k] represents an | |
'attempt' to predict `labels` and the loss is computed per slice. | |
surrogate_type: A string that determines which loss to return, supports | |
'xent' for cross-entropy and 'hinge' for hinge loss. | |
positive_weights: A `Tensor` that holds positive weights and has the | |
following semantics according to its shape: | |
scalar - A global positive weight. | |
1D tensor - must be of size K, a weight for each 'attempt' | |
2D tensor - of size [num_labels, K'] where K' is either K or 1. | |
The `positive_weights` will be expanded to the left to match the | |
dimensions of logits and labels. | |
negative_weights: A `Tensor` that holds positive weight and has the | |
semantics identical to positive_weights. | |
name: A name for the operation (optional). | |
Returns: | |
The weigthed loss. | |
Raises: | |
ValueError: If value of `surrogate_type` is not supported. | |
""" | |
with tf.name_scope( | |
name, 'weighted_loss', | |
[logits, labels, surrogate_type, positive_weights, | |
negative_weights]) as name: | |
if surrogate_type == 'xent': | |
return weighted_sigmoid_cross_entropy_with_logits( | |
logits=logits, | |
labels=labels, | |
positive_weights=positive_weights, | |
negative_weights=negative_weights, | |
name=name) | |
elif surrogate_type == 'hinge': | |
return weighted_hinge_loss( | |
logits=logits, | |
labels=labels, | |
positive_weights=positive_weights, | |
negative_weights=negative_weights, | |
name=name) | |
raise ValueError('surrogate_type %s not supported.' % surrogate_type) | |
def expand_outer(tensor, rank): | |
"""Expands the given `Tensor` outwards to a target rank. | |
For example if rank = 3 and tensor.shape is [3, 4], this function will expand | |
to such that the resulting shape will be [1, 3, 4]. | |
Args: | |
tensor: The tensor to expand. | |
rank: The target dimension. | |
Returns: | |
The expanded tensor. | |
Raises: | |
ValueError: If rank of `tensor` is unknown, or if `rank` is smaller than | |
the rank of `tensor`. | |
""" | |
if tensor.get_shape().ndims is None: | |
raise ValueError('tensor dimension must be known.') | |
if len(tensor.get_shape()) > rank: | |
raise ValueError( | |
'`rank` must be at least the current tensor dimension: (%s vs %s).' % | |
(rank, len(tensor.get_shape()))) | |
while len(tensor.get_shape()) < rank: | |
tensor = tf.expand_dims(tensor, 0) | |
return tensor | |
def build_label_priors(labels, | |
weights=None, | |
positive_pseudocount=1.0, | |
negative_pseudocount=1.0, | |
variables_collections=None): | |
"""Creates an op to maintain and update label prior probabilities. | |
For each label, the label priors are estimated as | |
(P + sum_i w_i y_i) / (P + N + sum_i w_i), | |
where y_i is the ith label, w_i is the ith weight, P is a pseudo-count of | |
positive labels, and N is a pseudo-count of negative labels. The index i | |
ranges over all labels observed during all evaluations of the returned op. | |
Args: | |
labels: A `Tensor` with shape [batch_size, num_labels]. Entries should be | |
in [0, 1]. | |
weights: Coefficients representing the weight of each label. Must be either | |
a Tensor of shape [batch_size, num_labels] or `None`, in which case each | |
weight is treated as 1.0. | |
positive_pseudocount: Number of positive labels used to initialize the label | |
priors. | |
negative_pseudocount: Number of negative labels used to initialize the label | |
priors. | |
variables_collections: Optional list of collections for created variables. | |
Returns: | |
label_priors: An op to update the weighted label_priors. Gives the | |
current value of the label priors when evaluated. | |
""" | |
dtype = labels.dtype.base_dtype | |
num_labels = get_num_labels(labels) | |
if weights is None: | |
weights = tf.ones_like(labels) | |
# We disable partitioning while constructing dual variables because they will | |
# be updated with assign, which is not available for partitioned variables. | |
partitioner = tf.get_variable_scope().partitioner | |
try: | |
tf.get_variable_scope().set_partitioner(None) | |
# Create variable and update op for weighted label counts. | |
weighted_label_counts = tf.contrib.framework.model_variable( | |
name='weighted_label_counts', | |
shape=[num_labels], | |
dtype=dtype, | |
initializer=tf.constant_initializer( | |
[positive_pseudocount] * num_labels, dtype=dtype), | |
collections=variables_collections, | |
trainable=False) | |
weighted_label_counts_update = weighted_label_counts.assign_add( | |
tf.reduce_sum(weights * labels, 0)) | |
# Create variable and update op for the sum of the weights. | |
weight_sum = tf.contrib.framework.model_variable( | |
name='weight_sum', | |
shape=[num_labels], | |
dtype=dtype, | |
initializer=tf.constant_initializer( | |
[positive_pseudocount + negative_pseudocount] * num_labels, | |
dtype=dtype), | |
collections=variables_collections, | |
trainable=False) | |
weight_sum_update = weight_sum.assign_add(tf.reduce_sum(weights, 0)) | |
finally: | |
tf.get_variable_scope().set_partitioner(partitioner) | |
label_priors = tf.div( | |
weighted_label_counts_update, | |
weight_sum_update) | |
return label_priors | |
def convert_and_cast(value, name, dtype): | |
"""Convert input to tensor and cast to dtype. | |
Args: | |
value: An object whose type has a registered Tensor conversion function, | |
e.g. python numerical type or numpy array. | |
name: Name to use for the new Tensor, if one is created. | |
dtype: Optional element type for the returned tensor. | |
Returns: | |
A tensor. | |
""" | |
return tf.cast(tf.convert_to_tensor(value, name=name), dtype=dtype) | |
def prepare_loss_args(labels, logits, positive_weights, negative_weights): | |
"""Prepare arguments for weighted loss functions. | |
If needed, will convert given arguments to appropriate type and shape. | |
Args: | |
labels: labels or labels of the loss function. | |
logits: Logits of the loss function. | |
positive_weights: Weight on the positive examples. | |
negative_weights: Weight on the negative examples. | |
Returns: | |
Converted labels, logits, positive_weights, negative_weights. | |
""" | |
logits = tf.convert_to_tensor(logits, name='logits') | |
labels = convert_and_cast(labels, 'labels', logits.dtype) | |
if len(labels.get_shape()) == 2 and len(logits.get_shape()) == 3: | |
labels = tf.expand_dims(labels, [2]) | |
positive_weights = convert_and_cast(positive_weights, 'positive_weights', | |
logits.dtype) | |
positive_weights = expand_outer(positive_weights, logits.get_shape().ndims) | |
negative_weights = convert_and_cast(negative_weights, 'negative_weights', | |
logits.dtype) | |
negative_weights = expand_outer(negative_weights, logits.get_shape().ndims) | |
return labels, logits, positive_weights, negative_weights | |
def get_num_labels(labels_or_logits): | |
"""Returns the number of labels inferred from labels_or_logits.""" | |
if labels_or_logits.get_shape().ndims <= 1: | |
return 1 | |
return labels_or_logits.get_shape()[1].value | |