Spaces:
Running
Running
# Copyright 2018 Google Inc. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Library with adversarial attacks. | |
This library designed to be self-contained and have no dependencies other | |
than TensorFlow. It only contains PGD / Iterative FGSM attacks, | |
see https://arxiv.org/abs/1706.06083 and https://arxiv.org/abs/1607.02533 | |
for details. | |
For wider set of adversarial attacks refer to Cleverhans library: | |
https://github.com/tensorflow/cleverhans | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import tensorflow as tf | |
def generate_pgd_common(x, | |
bounds, | |
model_fn, | |
attack_params, | |
one_hot_labels, | |
perturbation_multiplier): | |
"""Common code for generating PGD adversarial examples. | |
Args: | |
x: original examples. | |
bounds: tuple with bounds of image values, bounds[0] < bounds[1]. | |
model_fn: model function with signature model_fn(images). | |
attack_params: parameters of the attack. | |
one_hot_labels: one hot label vector to use in the loss. | |
perturbation_multiplier: multiplier of adversarial perturbation, | |
either +1.0 or -1.0. | |
Returns: | |
Tensor with adversarial examples. | |
Raises: | |
ValueError: if attack parameters are invalid. | |
""" | |
# parse attack_params | |
# Format of attack_params: 'EPS_STEP_NITER' | |
# where EPS - epsilon, STEP - step size, NITER - number of iterations | |
params_list = attack_params.split('_') | |
if len(params_list) != 3: | |
raise ValueError('Invalid parameters of PGD attack: %s' % attack_params) | |
epsilon = int(params_list[0]) | |
step_size = int(params_list[1]) | |
niter = int(params_list[2]) | |
# rescale epsilon and step size to image bounds | |
epsilon = float(epsilon) / 255.0 * (bounds[1] - bounds[0]) | |
step_size = float(step_size) / 255.0 * (bounds[1] - bounds[0]) | |
# clipping boundaries | |
clip_min = tf.maximum(x - epsilon, bounds[0]) | |
clip_max = tf.minimum(x + epsilon, bounds[1]) | |
# compute starting point | |
start_x = x + tf.random_uniform(tf.shape(x), -epsilon, epsilon) | |
start_x = tf.clip_by_value(start_x, clip_min, clip_max) | |
# main iteration of PGD | |
loop_vars = [0, start_x] | |
def loop_cond(index, _): | |
return index < niter | |
def loop_body(index, adv_images): | |
logits = model_fn(adv_images) | |
loss = tf.reduce_sum( | |
tf.nn.softmax_cross_entropy_with_logits_v2( | |
labels=one_hot_labels, | |
logits=logits)) | |
perturbation = step_size * tf.sign(tf.gradients(loss, adv_images)[0]) | |
new_adv_images = adv_images + perturbation_multiplier * perturbation | |
new_adv_images = tf.clip_by_value(new_adv_images, clip_min, clip_max) | |
return index + 1, new_adv_images | |
with tf.control_dependencies([start_x]): | |
_, result = tf.while_loop( | |
loop_cond, | |
loop_body, | |
loop_vars, | |
back_prop=False, | |
parallel_iterations=1) | |
return result | |
def generate_pgd_ll(x, bounds, model_fn, attack_params): | |
# pylint: disable=g-doc-args | |
"""Generats targeted PGD adversarial examples with least likely target class. | |
See generate_pgd_common for description of arguments. | |
Returns: | |
Tensor with adversarial examples. | |
""" | |
# pylint: enable=g-doc-args | |
# compute one hot least likely class | |
logits = model_fn(x) | |
num_classes = tf.shape(logits)[1] | |
one_hot_labels = tf.one_hot(tf.argmin(model_fn(x), axis=1), num_classes) | |
return generate_pgd_common(x, bounds, model_fn, attack_params, | |
one_hot_labels=one_hot_labels, | |
perturbation_multiplier=-1.0) | |
def generate_pgd_rand(x, bounds, model_fn, attack_params): | |
# pylint: disable=g-doc-args | |
"""Generats targeted PGD adversarial examples with random target class. | |
See generate_pgd_common for description of arguments. | |
Returns: | |
Tensor with adversarial examples. | |
""" | |
# pylint: enable=g-doc-args | |
# compute one hot random class | |
logits = model_fn(x) | |
batch_size = tf.shape(logits)[0] | |
num_classes = tf.shape(logits)[1] | |
random_labels = tf.random_uniform(shape=[batch_size], | |
minval=0, | |
maxval=num_classes, | |
dtype=tf.int32) | |
one_hot_labels = tf.one_hot(random_labels, num_classes) | |
return generate_pgd_common(x, bounds, model_fn, attack_params, | |
one_hot_labels=one_hot_labels, | |
perturbation_multiplier=-1.0) | |
def generate_pgd(x, bounds, model_fn, attack_params): | |
# pylint: disable=g-doc-args | |
"""Generats non-targeted PGD adversarial examples. | |
See generate_pgd_common for description of arguments. | |
Returns: | |
tensor with adversarial examples. | |
""" | |
# pylint: enable=g-doc-args | |
# compute one hot predicted class | |
logits = model_fn(x) | |
num_classes = tf.shape(logits)[1] | |
one_hot_labels = tf.one_hot(tf.argmax(model_fn(x), axis=1), num_classes) | |
return generate_pgd_common(x, bounds, model_fn, attack_params, | |
one_hot_labels=one_hot_labels, | |
perturbation_multiplier=1.0) | |
def generate_adversarial_examples(x, bounds, model_fn, attack_description): | |
"""Generates adversarial examples. | |
Args: | |
x: original examples. | |
bounds: tuple with bounds of image values, bounds[0] < bounds[1] | |
model_fn: model function with signature model_fn(images). | |
attack_description: string which describes an attack, see notes below for | |
details. | |
Returns: | |
Tensor with adversarial examples. | |
Raises: | |
ValueError: if attack description is invalid. | |
Attack description could be one of the following strings: | |
- "clean" - no attack, return original images. | |
- "pgd_EPS_STEP_NITER" - non-targeted PGD attack. | |
- "pgdll_EPS_STEP_NITER" - tageted PGD attack with least likely target class. | |
- "pgdrnd_EPS_STEP_NITER" - targetd PGD attack with random target class. | |
Meaning of attack parameters is following: | |
- EPS - maximum size of adversarial perturbation, between 0 and 255. | |
- STEP - step size of one iteration of PGD, between 0 and 255. | |
- NITER - number of iterations. | |
""" | |
if attack_description == 'clean': | |
return x | |
idx = attack_description.find('_') | |
if idx < 0: | |
raise ValueError('Invalid value of attack description %s' | |
% attack_description) | |
attack_name = attack_description[:idx] | |
attack_params = attack_description[idx+1:] | |
if attack_name == 'pgdll': | |
return generate_pgd_ll(x, bounds, model_fn, attack_params) | |
elif attack_name == 'pgdrnd': | |
return generate_pgd_rand(x, bounds, model_fn, attack_params) | |
elif attack_name == 'pgd': | |
return generate_pgd(x, bounds, model_fn, attack_params) | |
else: | |
raise ValueError('Invalid value of attack description %s' | |
% attack_description) | |