Spaces:
Running
Running
Commit
•
3eebbc1
1
Parent(s):
44645f1
Adding fawkes updated files
Browse files- app.py +13 -36
- fawkes/__init__.py +19 -0
- fawkes/align_face.py +80 -0
- fawkes/differentiator.py +300 -0
- fawkes/protection.py +197 -0
- fawkes/utils.py +731 -0
app.py
CHANGED
@@ -1,38 +1,22 @@
|
|
1 |
import gradio as gr
|
2 |
-
import glob
|
3 |
-
import os
|
4 |
-
from PIL import Image
|
5 |
-
import numpy as np
|
6 |
-
from fawkes.protection import Fawkes # Make sure the import path is correct
|
7 |
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
"""
|
13 |
Gradio compatible function for running protection.
|
14 |
"""
|
15 |
if uploaded_image is None:
|
16 |
return None, "No image uploaded."
|
17 |
|
18 |
-
# Save the uploaded image to a temporary directory
|
19 |
-
temp_dir = "temp_imgs"
|
20 |
-
os.makedirs(temp_dir, exist_ok=True)
|
21 |
-
img_path = os.path.join(temp_dir, "uploaded_image.png")
|
22 |
-
uploaded_image.save(img_path)
|
23 |
-
|
24 |
# Run the protection process
|
25 |
-
protector = Fawkes(
|
26 |
-
|
27 |
-
|
28 |
-
max_step=max_step, batch_size=batch_size,
|
29 |
-
format=format, separate_target=separate_target,
|
30 |
-
debug=debug, no_align=no_align)
|
31 |
|
32 |
-
|
33 |
-
processed_img_path = img_path.replace(".png", "_cloaked.png")
|
34 |
-
if os.path.exists(processed_img_path):
|
35 |
-
processed_image = Image.open(processed_img_path)
|
36 |
return processed_image, "Protection process completed."
|
37 |
else:
|
38 |
return None, "Protection process failed or no cloaked image generated."
|
@@ -46,18 +30,12 @@ with gr.Blocks() as demo:
|
|
46 |
|
47 |
with gr.Column():
|
48 |
gr.Markdown("### Configuration Options")
|
49 |
-
|
50 |
-
|
51 |
-
feature_extractor = gr.Textbox(label="Feature Extractor", value="arcface_extractor_0")
|
52 |
-
th = gr.Slider(label="Threshold", minimum=0.001, maximum=0.05, value=0.01)
|
53 |
-
max_step = gr.Slider(label="Max Steps", minimum=500, maximum=2000, value=1000)
|
54 |
-
sd = gr.Slider(label="Penalty Number (SD)", minimum=1e5, maximum=1e7, value=1e6)
|
55 |
-
lr = gr.Slider(label="Learning Rate", minimum=1, maximum=25, value=2)
|
56 |
-
batch_size = gr.Slider(label="Batch Size", minimum=1, maximum=10, value=1)
|
57 |
-
format = gr.Radio(label="Output Format", choices=['png', 'jpg', 'jpeg'])
|
58 |
separate_target = gr.Checkbox(label="Separate Target")
|
59 |
no_align = gr.Checkbox(label="No Align")
|
60 |
-
|
|
|
61 |
|
62 |
run_button = gr.Button("Run Protection")
|
63 |
output_image = gr.Image(label="Processed Image")
|
@@ -65,8 +43,7 @@ with gr.Blocks() as demo:
|
|
65 |
|
66 |
run_button.click(
|
67 |
fn=run_protection_interface,
|
68 |
-
inputs=[uploaded_image,
|
69 |
-
separate_target, no_align, debug],
|
70 |
outputs=[output_image, output_text]
|
71 |
)
|
72 |
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
from fawkes.protection import Fawkes
|
4 |
|
5 |
+
|
6 |
+
def run_protection_interface(uploaded_image, mode='low', sd=1e6, format='png', separate_target=False, no_align=False,
|
7 |
+
debug=False):
|
8 |
"""
|
9 |
Gradio compatible function for running protection.
|
10 |
"""
|
11 |
if uploaded_image is None:
|
12 |
return None, "No image uploaded."
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Run the protection process
|
15 |
+
protector = Fawkes(gpu="0", batch_size=1, mode=mode)
|
16 |
+
processed_image = protector.run_protection(uploaded_image, sd=sd, batch_size=1, format=format,
|
17 |
+
separate_target=separate_target, debug=debug, no_align=no_align)
|
|
|
|
|
|
|
18 |
|
19 |
+
if processed_image is not None:
|
|
|
|
|
|
|
20 |
return processed_image, "Protection process completed."
|
21 |
else:
|
22 |
return None, "Protection process failed or no cloaked image generated."
|
|
|
30 |
|
31 |
with gr.Column():
|
32 |
gr.Markdown("### Configuration Options")
|
33 |
+
mode = gr.Radio(label="Mode", choices=['low', 'mid', 'high'], value='low')
|
34 |
+
format = gr.Radio(label="Output Format", choices=['png', 'jpg', 'jpeg'], value='png')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
separate_target = gr.Checkbox(label="Separate Target")
|
36 |
no_align = gr.Checkbox(label="No Align")
|
37 |
+
with gr.Accordion(label='Advanced Config', open=False):
|
38 |
+
sd = gr.Slider(label="Penalty Number (SD)", minimum=1e5, maximum=1e7, value=1e6)
|
39 |
|
40 |
run_button = gr.Button("Run Protection")
|
41 |
output_image = gr.Image(label="Processed Image")
|
|
|
43 |
|
44 |
run_button.click(
|
45 |
fn=run_protection_interface,
|
46 |
+
inputs=[uploaded_image, mode, sd, format, separate_target, no_align],
|
|
|
47 |
outputs=[output_image, output_text]
|
48 |
)
|
49 |
|
fawkes/__init__.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
# @Date : 2020-07-01
|
3 |
+
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
|
4 |
+
# @Link : https://www.shawnshan.com/
|
5 |
+
|
6 |
+
|
7 |
+
__version__ = '1.0.2'
|
8 |
+
|
9 |
+
from .differentiator import FawkesMaskGeneration
|
10 |
+
from .protection import main, Fawkes
|
11 |
+
from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, Faces, get_file
|
12 |
+
|
13 |
+
__all__ = (
|
14 |
+
'__version__',
|
15 |
+
'FawkesMaskGeneration', 'load_extractor',
|
16 |
+
'init_gpu',
|
17 |
+
'select_target_label', 'dump_image', 'reverse_process_cloaked',
|
18 |
+
'Faces', 'get_file', 'main', 'Fawkes'
|
19 |
+
)
|
fawkes/align_face.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from mtcnn import MTCNN
|
3 |
+
|
4 |
+
|
5 |
+
def to_rgb(img):
|
6 |
+
w, h = img.shape
|
7 |
+
ret = np.empty((w, h, 3), dtype=np.uint8)
|
8 |
+
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
|
9 |
+
return ret
|
10 |
+
|
11 |
+
|
12 |
+
def aligner():
|
13 |
+
return MTCNN(min_face_size=30)
|
14 |
+
|
15 |
+
|
16 |
+
def align(orig_img, aligner):
|
17 |
+
""" run MTCNN face detector """
|
18 |
+
|
19 |
+
if orig_img.ndim < 2:
|
20 |
+
return None
|
21 |
+
if orig_img.ndim == 2:
|
22 |
+
orig_img = to_rgb(orig_img)
|
23 |
+
orig_img = orig_img[:, :, 0:3]
|
24 |
+
|
25 |
+
detect_results = aligner.detect_faces(orig_img)
|
26 |
+
cropped_arr = []
|
27 |
+
bounding_boxes_arr = []
|
28 |
+
for dic in detect_results:
|
29 |
+
if dic['confidence'] < 0.9:
|
30 |
+
continue
|
31 |
+
x, y, width, height = dic['box']
|
32 |
+
|
33 |
+
if width < 30 or height < 30:
|
34 |
+
continue
|
35 |
+
bb = [y, x, y + height, x + width]
|
36 |
+
cropped = orig_img[bb[0]:bb[2], bb[1]:bb[3], :]
|
37 |
+
cropped_arr.append(np.copy(cropped))
|
38 |
+
bounding_boxes_arr.append(bb)
|
39 |
+
|
40 |
+
return cropped_arr, bounding_boxes_arr
|
41 |
+
|
42 |
+
# if nrof_faces > 0:
|
43 |
+
# det = bounding_boxes[0]['box']
|
44 |
+
# det_arr = []
|
45 |
+
# img_size = np.asarray(orig_img.shape)[0:2]
|
46 |
+
# if nrof_faces > 1:
|
47 |
+
# margin = margin / 1.5
|
48 |
+
# if detect_multiple_faces:
|
49 |
+
# for i in range(nrof_faces):
|
50 |
+
# det_arr.append(np.squeeze(bounding_boxes[i]['box']))
|
51 |
+
# else:
|
52 |
+
# bounding_box_size = (det[1] + det[3])
|
53 |
+
# img_center = img_size / 2
|
54 |
+
# offsets = np.vstack([(det[0] + det[2]) / 2 - img_center[1],
|
55 |
+
# (det[1] + det[3]) / 2 - img_center[0]])
|
56 |
+
# offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
57 |
+
# index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering
|
58 |
+
# det_arr.append(det[index, :])
|
59 |
+
# else:
|
60 |
+
# det_arr.append(np.squeeze(det))
|
61 |
+
#
|
62 |
+
# cropped_arr = []
|
63 |
+
# bounding_boxes_arr = []
|
64 |
+
# for i, det in enumerate(det_arr):
|
65 |
+
# det = np.squeeze(det)
|
66 |
+
# bb = np.zeros(4, dtype=np.int32)
|
67 |
+
# # add in margin
|
68 |
+
# marg1 = int((det[2] - det[0]) * margin)
|
69 |
+
# marg2 = int((det[3] - det[1]) * margin)
|
70 |
+
#
|
71 |
+
# bb[0] = max(det[0] - marg1 / 2, 0)
|
72 |
+
# bb[1] = max(det[1] - marg2 / 2, 0)
|
73 |
+
# bb[2] = min(det[0] + det[2] + marg1 / 2, img_size[0])
|
74 |
+
# bb[3] = min(det[1] + det[3] + marg2 / 2, img_size[1])
|
75 |
+
# cropped = orig_img[bb[0]:bb[2], bb[1]: bb[3], :]
|
76 |
+
# cropped_arr.append(cropped)
|
77 |
+
# bounding_boxes_arr.append([bb[0], bb[1], bb[2], bb[3]])
|
78 |
+
# return cropped_arr, bounding_boxes_arr
|
79 |
+
# else:
|
80 |
+
# return None
|
fawkes/differentiator.py
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Date : 2020-10-21
|
4 |
+
# @Author : Emily Wenger (ewenger@uchicago.edu)
|
5 |
+
|
6 |
+
import datetime
|
7 |
+
import time
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
import tensorflow as tf
|
11 |
+
from fawkes.utils import preprocess, reverse_preprocess
|
12 |
+
from keras.utils import Progbar
|
13 |
+
|
14 |
+
|
15 |
+
class FawkesMaskGeneration:
|
16 |
+
# if the attack is trying to mimic a target image or a neuron vector
|
17 |
+
MIMIC_IMG = True
|
18 |
+
# number of iterations to perform gradient descent
|
19 |
+
MAX_ITERATIONS = 10000
|
20 |
+
# larger values converge faster to less accurate results
|
21 |
+
LEARNING_RATE = 1e-2
|
22 |
+
# the initial constant c to pick as a first guess
|
23 |
+
INITIAL_CONST = 1
|
24 |
+
# pixel intensity range
|
25 |
+
INTENSITY_RANGE = 'imagenet'
|
26 |
+
# threshold for distance
|
27 |
+
L_THRESHOLD = 0.03
|
28 |
+
# whether keep the final result or the best result
|
29 |
+
KEEP_FINAL = False
|
30 |
+
# max_val of image
|
31 |
+
MAX_VAL = 255
|
32 |
+
MAXIMIZE = False
|
33 |
+
IMAGE_SHAPE = (112, 112, 3)
|
34 |
+
RATIO = 1.0
|
35 |
+
LIMIT_DIST = False
|
36 |
+
LOSS_TYPE = 'features' # use features (original Fawkes) or gradients (Witches Brew) to run Fawkes?
|
37 |
+
|
38 |
+
def __init__(self, bottleneck_model_ls, mimic_img=MIMIC_IMG,
|
39 |
+
batch_size=1, learning_rate=LEARNING_RATE,
|
40 |
+
max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
|
41 |
+
intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
|
42 |
+
max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE, verbose=1,
|
43 |
+
ratio=RATIO, limit_dist=LIMIT_DIST, loss_method=LOSS_TYPE, tanh_process=True,
|
44 |
+
save_last_on_failed=True):
|
45 |
+
|
46 |
+
assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}
|
47 |
+
|
48 |
+
# constant used for tanh transformation to avoid corner cases
|
49 |
+
|
50 |
+
self.it = 0
|
51 |
+
self.tanh_constant = 2 - 1e-6
|
52 |
+
self.save_last_on_failed = save_last_on_failed
|
53 |
+
self.MIMIC_IMG = mimic_img
|
54 |
+
self.LEARNING_RATE = learning_rate
|
55 |
+
self.MAX_ITERATIONS = max_iterations
|
56 |
+
self.initial_const = initial_const
|
57 |
+
self.batch_size = batch_size
|
58 |
+
self.intensity_range = intensity_range
|
59 |
+
self.l_threshold = l_threshold
|
60 |
+
self.max_val = max_val
|
61 |
+
self.keep_final = keep_final
|
62 |
+
self.verbose = verbose
|
63 |
+
self.maximize = maximize
|
64 |
+
self.learning_rate = learning_rate
|
65 |
+
self.ratio = ratio
|
66 |
+
self.limit_dist = limit_dist
|
67 |
+
self.single_shape = list(image_shape)
|
68 |
+
self.bottleneck_models = bottleneck_model_ls
|
69 |
+
self.loss_method = loss_method
|
70 |
+
self.tanh_process = tanh_process
|
71 |
+
|
72 |
+
@staticmethod
|
73 |
+
def resize_tensor(input_tensor, model_input_shape):
|
74 |
+
if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
|
75 |
+
return input_tensor
|
76 |
+
resized_tensor = tf.image.resize(input_tensor, model_input_shape[:2])
|
77 |
+
return resized_tensor
|
78 |
+
|
79 |
+
def preprocess_arctanh(self, imgs):
|
80 |
+
""" Do tan preprocess """
|
81 |
+
imgs = reverse_preprocess(imgs, self.intensity_range)
|
82 |
+
imgs = imgs / 255.0
|
83 |
+
imgs = imgs - 0.5
|
84 |
+
imgs = imgs * self.tanh_constant
|
85 |
+
tanh_imgs = np.arctanh(imgs)
|
86 |
+
return tanh_imgs
|
87 |
+
|
88 |
+
def reverse_arctanh(self, imgs):
|
89 |
+
raw_img = (tf.tanh(imgs) / self.tanh_constant + 0.5) * 255
|
90 |
+
return raw_img
|
91 |
+
|
92 |
+
def input_space_process(self, img):
|
93 |
+
if self.intensity_range == 'imagenet':
|
94 |
+
mean = np.repeat([[[[103.939, 116.779, 123.68]]]], len(img), axis=0)
|
95 |
+
raw_img = (img[..., ::-1] - mean)
|
96 |
+
else:
|
97 |
+
raw_img = img
|
98 |
+
return raw_img
|
99 |
+
|
100 |
+
def clipping(self, imgs):
|
101 |
+
imgs = reverse_preprocess(imgs, self.intensity_range)
|
102 |
+
imgs = np.clip(imgs, 0, self.max_val)
|
103 |
+
imgs = preprocess(imgs, self.intensity_range)
|
104 |
+
return imgs
|
105 |
+
|
106 |
+
def calc_dissim(self, source_raw, source_mod_raw):
|
107 |
+
msssim_split = tf.image.ssim(source_raw, source_mod_raw, max_val=255.0)
|
108 |
+
dist_raw = (1.0 - tf.stack(msssim_split)) / 2.0
|
109 |
+
dist = tf.maximum(dist_raw - self.l_threshold, 0.0)
|
110 |
+
dist_raw_avg = tf.reduce_mean(dist_raw)
|
111 |
+
dist_sum = tf.reduce_sum(dist)
|
112 |
+
|
113 |
+
return dist, dist_raw, dist_sum, dist_raw_avg
|
114 |
+
|
115 |
+
def calc_bottlesim(self, tape, source_raw, target_raw, original_raw):
|
116 |
+
""" original Fawkes loss function. """
|
117 |
+
bottlesim = 0.0
|
118 |
+
bottlesim_sum = 0.0
|
119 |
+
# make sure everything is the right size.
|
120 |
+
model_input_shape = self.single_shape
|
121 |
+
cur_aimg_input = self.resize_tensor(source_raw, model_input_shape)
|
122 |
+
if target_raw is not None:
|
123 |
+
cur_timg_input = self.resize_tensor(target_raw, model_input_shape)
|
124 |
+
for bottleneck_model in self.bottleneck_models:
|
125 |
+
if tape is not None:
|
126 |
+
try:
|
127 |
+
tape.watch(bottleneck_model.model.variables)
|
128 |
+
except AttributeError:
|
129 |
+
tape.watch(bottleneck_model.variables)
|
130 |
+
# get the respective feature space reprs.
|
131 |
+
bottleneck_a = bottleneck_model(cur_aimg_input)
|
132 |
+
if self.maximize:
|
133 |
+
bottleneck_s = bottleneck_model(original_raw)
|
134 |
+
bottleneck_diff = bottleneck_a - bottleneck_s
|
135 |
+
scale_factor = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_s), axis=1))
|
136 |
+
else:
|
137 |
+
bottleneck_t = bottleneck_model(cur_timg_input)
|
138 |
+
bottleneck_diff = bottleneck_t - bottleneck_a
|
139 |
+
scale_factor = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_t), axis=1))
|
140 |
+
cur_bottlesim = tf.reduce_sum(tf.square(bottleneck_diff), axis=1)
|
141 |
+
cur_bottlesim = cur_bottlesim / scale_factor
|
142 |
+
bottlesim += cur_bottlesim
|
143 |
+
bottlesim_sum += tf.reduce_sum(cur_bottlesim)
|
144 |
+
return bottlesim, bottlesim_sum
|
145 |
+
|
146 |
+
def compute_feature_loss(self, tape, aimg_raw, simg_raw, aimg_input, timg_input, simg_input):
|
147 |
+
""" Compute input space + feature space loss.
|
148 |
+
"""
|
149 |
+
input_space_loss, dist_raw, input_space_loss_sum, input_space_loss_raw_avg = self.calc_dissim(aimg_raw,
|
150 |
+
simg_raw)
|
151 |
+
feature_space_loss, feature_space_loss_sum = self.calc_bottlesim(tape, aimg_input, timg_input, simg_input)
|
152 |
+
|
153 |
+
if self.maximize:
|
154 |
+
loss = self.const * tf.square(input_space_loss) - feature_space_loss * self.const_diff
|
155 |
+
else:
|
156 |
+
if self.it < self.MAX_ITERATIONS:
|
157 |
+
loss = self.const * tf.square(input_space_loss) + 1000 * feature_space_loss
|
158 |
+
|
159 |
+
loss_sum = tf.reduce_sum(loss)
|
160 |
+
return loss_sum, feature_space_loss, input_space_loss_raw_avg, dist_raw
|
161 |
+
|
162 |
+
def compute(self, source_imgs, target_imgs=None):
|
163 |
+
""" Main function that runs cloak generation. """
|
164 |
+
start_time = time.time()
|
165 |
+
adv_imgs = []
|
166 |
+
for idx in range(0, len(source_imgs), self.batch_size):
|
167 |
+
print('processing image %d at %s' % (idx + 1, datetime.datetime.now()))
|
168 |
+
adv_img = self.compute_batch(source_imgs[idx:idx + self.batch_size],
|
169 |
+
target_imgs[idx:idx + self.batch_size] if target_imgs is not None else None)
|
170 |
+
adv_imgs.extend(adv_img)
|
171 |
+
elapsed_time = time.time() - start_time
|
172 |
+
print('protection cost %f s' % elapsed_time)
|
173 |
+
return np.array(adv_imgs)
|
174 |
+
|
175 |
+
def compute_batch(self, source_imgs, target_imgs=None, retry=True):
|
176 |
+
""" TF2 method to generate the cloak. """
|
177 |
+
# preprocess images.
|
178 |
+
global progressbar
|
179 |
+
nb_imgs = source_imgs.shape[0]
|
180 |
+
|
181 |
+
# make sure source/target images are an array
|
182 |
+
source_imgs = np.array(source_imgs, dtype=np.float32)
|
183 |
+
if target_imgs is not None:
|
184 |
+
target_imgs = np.array(target_imgs, dtype=np.float32)
|
185 |
+
|
186 |
+
# metrics to test
|
187 |
+
best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
|
188 |
+
best_adv = np.zeros(source_imgs.shape)
|
189 |
+
|
190 |
+
# convert to tanh-space
|
191 |
+
simg_tanh = self.preprocess_arctanh(source_imgs)
|
192 |
+
if target_imgs is not None:
|
193 |
+
timg_tanh = self.preprocess_arctanh(target_imgs)
|
194 |
+
self.modifier = tf.Variable(np.random.uniform(-1, 1, tuple([len(source_imgs)] + self.single_shape)) * 1e-4,
|
195 |
+
dtype=tf.float32)
|
196 |
+
|
197 |
+
# make the optimizer
|
198 |
+
optimizer = tf.keras.optimizers.legacy.Adadelta(float(self.learning_rate))
|
199 |
+
const_numpy = np.ones(len(source_imgs)) * self.initial_const
|
200 |
+
self.const = tf.Variable(const_numpy, dtype=np.float32)
|
201 |
+
|
202 |
+
const_diff_numpy = np.ones(len(source_imgs)) * 1.0
|
203 |
+
self.const_diff = tf.Variable(const_diff_numpy, dtype=np.float32)
|
204 |
+
|
205 |
+
# get the modifier
|
206 |
+
if self.verbose == 0:
|
207 |
+
progressbar = Progbar(
|
208 |
+
self.MAX_ITERATIONS, width=30, verbose=1
|
209 |
+
)
|
210 |
+
# watch relevant variables.
|
211 |
+
simg_tanh = tf.Variable(simg_tanh, dtype=np.float32)
|
212 |
+
simg_raw = tf.Variable(source_imgs, dtype=np.float32)
|
213 |
+
if target_imgs is not None:
|
214 |
+
timg_raw = tf.Variable(timg_tanh, dtype=np.float32)
|
215 |
+
# run the attack
|
216 |
+
outside_list = np.ones(len(source_imgs))
|
217 |
+
self.it = 0
|
218 |
+
|
219 |
+
while self.it < self.MAX_ITERATIONS:
|
220 |
+
|
221 |
+
self.it += 1
|
222 |
+
with tf.GradientTape(persistent=True) as tape:
|
223 |
+
tape.watch(self.modifier)
|
224 |
+
tape.watch(simg_tanh)
|
225 |
+
|
226 |
+
# Convert from tanh for DISSIM
|
227 |
+
aimg_raw = self.reverse_arctanh(simg_tanh + self.modifier)
|
228 |
+
|
229 |
+
actual_modifier = aimg_raw - simg_raw
|
230 |
+
actual_modifier = tf.clip_by_value(actual_modifier, -15.0, 15.0)
|
231 |
+
aimg_raw = simg_raw + actual_modifier
|
232 |
+
|
233 |
+
simg_raw = self.reverse_arctanh(simg_tanh)
|
234 |
+
|
235 |
+
# Convert further preprocess for bottleneck
|
236 |
+
aimg_input = self.input_space_process(aimg_raw)
|
237 |
+
if target_imgs is not None:
|
238 |
+
timg_input = self.input_space_process(timg_raw)
|
239 |
+
else:
|
240 |
+
timg_input = None
|
241 |
+
simg_input = self.input_space_process(simg_raw)
|
242 |
+
|
243 |
+
# get the feature space loss.
|
244 |
+
loss, internal_dist, input_dist_avg, dist_raw = self.compute_feature_loss(
|
245 |
+
tape, aimg_raw, simg_raw, aimg_input, timg_input, simg_input)
|
246 |
+
|
247 |
+
# compute gradients
|
248 |
+
grad = tape.gradient(loss, [self.modifier])
|
249 |
+
if grad[0] is not None:
|
250 |
+
optimizer.apply_gradients(zip(grad, [self.modifier]))
|
251 |
+
|
252 |
+
if self.it == 1:
|
253 |
+
self.modifier = tf.Variable(self.modifier - tf.sign(grad[0]) * 0.01, dtype=tf.float32)
|
254 |
+
|
255 |
+
for e, (input_dist, feature_d, mod_img) in enumerate(zip(dist_raw, internal_dist, aimg_input)):
|
256 |
+
if e >= nb_imgs:
|
257 |
+
break
|
258 |
+
input_dist = input_dist.numpy()
|
259 |
+
feature_d = feature_d.numpy()
|
260 |
+
|
261 |
+
if input_dist <= self.l_threshold * 0.9 and const_diff_numpy[e] <= 129:
|
262 |
+
const_diff_numpy[e] *= 2
|
263 |
+
if outside_list[e] == -1:
|
264 |
+
const_diff_numpy[e] = 1
|
265 |
+
outside_list[e] = 1
|
266 |
+
elif input_dist >= self.l_threshold * 1.1 and const_diff_numpy[e] >= 1 / 129:
|
267 |
+
const_diff_numpy[e] /= 2
|
268 |
+
|
269 |
+
if outside_list[e] == 1:
|
270 |
+
const_diff_numpy[e] = 1
|
271 |
+
outside_list[e] = -1
|
272 |
+
else:
|
273 |
+
const_diff_numpy[e] = 1.0
|
274 |
+
outside_list[e] = 0
|
275 |
+
|
276 |
+
if input_dist <= self.l_threshold * 1.1 and (
|
277 |
+
(feature_d < best_bottlesim[e] and (not self.maximize)) or (
|
278 |
+
feature_d > best_bottlesim[e] and self.maximize)):
|
279 |
+
best_bottlesim[e] = feature_d
|
280 |
+
best_adv[e] = mod_img
|
281 |
+
|
282 |
+
self.const_diff = tf.Variable(const_diff_numpy, dtype=np.float32)
|
283 |
+
|
284 |
+
if self.verbose == 1:
|
285 |
+
print("ITER {:0.2f} Total Loss: {:.2f} {:0.4f} raw; diff: {:.4f}".format(self.it, loss, input_dist_avg,
|
286 |
+
np.mean(internal_dist)))
|
287 |
+
|
288 |
+
if self.verbose == 0:
|
289 |
+
progressbar.update(self.it)
|
290 |
+
if self.verbose == 1:
|
291 |
+
print("Final diff: {:.4f}".format(np.mean(best_bottlesim)))
|
292 |
+
print("\n")
|
293 |
+
|
294 |
+
if self.save_last_on_failed:
|
295 |
+
for e, diff in enumerate(best_bottlesim):
|
296 |
+
if diff < 0.3 and dist_raw[e] < 0.015 and internal_dist[e] > diff:
|
297 |
+
best_adv[e] = aimg_input[e]
|
298 |
+
|
299 |
+
best_adv = self.clipping(best_adv[:nb_imgs])
|
300 |
+
return best_adv
|
fawkes/protection.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Date : 2020-05-17
|
4 |
+
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
|
5 |
+
# @Link : https://www.shawnshan.com/
|
6 |
+
|
7 |
+
import argparse
|
8 |
+
import glob
|
9 |
+
import logging
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
|
13 |
+
logging.getLogger('tensorflow').setLevel(logging.ERROR)
|
14 |
+
os.environ["KMP_AFFINITY"] = "noverbose"
|
15 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
16 |
+
import tensorflow as tf
|
17 |
+
|
18 |
+
tf.get_logger().setLevel('ERROR')
|
19 |
+
tf.autograph.set_verbosity(3)
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
from fawkes.differentiator import FawkesMaskGeneration
|
23 |
+
from fawkes.utils import init_gpu, reverse_process_cloaked, \
|
24 |
+
Faces, load_extractor
|
25 |
+
|
26 |
+
from fawkes.align_face import aligner
|
27 |
+
|
28 |
+
|
29 |
+
def generate_cloak_images(protector, image_X, target_emb=None):
|
30 |
+
cloaked_image_X = protector.compute(image_X, target_emb)
|
31 |
+
return cloaked_image_X
|
32 |
+
|
33 |
+
|
34 |
+
IMG_SIZE = 112
|
35 |
+
PREPROCESS = 'raw'
|
36 |
+
|
37 |
+
|
38 |
+
class Fawkes(object):
|
39 |
+
def __init__(self, gpu, batch_size, mode="low"):
|
40 |
+
|
41 |
+
self.gpu = gpu
|
42 |
+
self.batch_size = batch_size
|
43 |
+
self.mode = mode
|
44 |
+
th, max_step, lr, extractors = self.mode2param(self.mode)
|
45 |
+
self.th = th
|
46 |
+
self.lr = lr
|
47 |
+
self.max_step = max_step
|
48 |
+
if gpu is not None:
|
49 |
+
init_gpu(gpu)
|
50 |
+
|
51 |
+
self.aligner = aligner()
|
52 |
+
|
53 |
+
self.protector = None
|
54 |
+
self.protector_param = None
|
55 |
+
self.feature_extractors_ls = [load_extractor(name) for name in extractors]
|
56 |
+
|
57 |
+
def mode2param(self, mode):
|
58 |
+
if mode == 'low':
|
59 |
+
th = 0.004
|
60 |
+
max_step = 40
|
61 |
+
lr = 25
|
62 |
+
extractors = ["extractor_2"]
|
63 |
+
|
64 |
+
elif mode == 'mid':
|
65 |
+
th = 0.012
|
66 |
+
max_step = 75
|
67 |
+
lr = 20
|
68 |
+
extractors = ["extractor_0", "extractor_2"]
|
69 |
+
|
70 |
+
elif mode == 'high':
|
71 |
+
th = 0.017
|
72 |
+
max_step = 150
|
73 |
+
lr = 15
|
74 |
+
extractors = ["extractor_0", "extractor_2"]
|
75 |
+
|
76 |
+
else:
|
77 |
+
raise Exception("mode must be one of 'min', 'low', 'mid', 'high'")
|
78 |
+
return th, max_step, lr, extractors
|
79 |
+
|
80 |
+
def run_protection(self, image, sd=1e7, batch_size=1, format='png', separate_target=True, debug=False,
|
81 |
+
no_align=False, maximize=True, save_last_on_failed=True):
|
82 |
+
|
83 |
+
current_param = "-".join([str(x) for x in [self.th, sd, self.lr, self.max_step, batch_size, format,
|
84 |
+
separate_target, debug]])
|
85 |
+
|
86 |
+
faces = Faces(image, self.aligner, verbose=1, no_align=no_align)
|
87 |
+
original_images = faces.cropped_faces
|
88 |
+
|
89 |
+
if len(original_images) == 0:
|
90 |
+
print("No face detected. ")
|
91 |
+
return 2
|
92 |
+
original_images = np.array(original_images)
|
93 |
+
|
94 |
+
if current_param != self.protector_param:
|
95 |
+
self.protector_param = current_param
|
96 |
+
if self.protector is not None:
|
97 |
+
del self.protector
|
98 |
+
if batch_size == -1:
|
99 |
+
batch_size = len(original_images)
|
100 |
+
self.protector = FawkesMaskGeneration(self.feature_extractors_ls,
|
101 |
+
batch_size=batch_size,
|
102 |
+
mimic_img=True,
|
103 |
+
intensity_range=PREPROCESS,
|
104 |
+
initial_const=sd,
|
105 |
+
learning_rate=self.lr,
|
106 |
+
max_iterations=self.max_step,
|
107 |
+
l_threshold=self.th,
|
108 |
+
verbose=debug,
|
109 |
+
maximize=maximize,
|
110 |
+
keep_final=False,
|
111 |
+
image_shape=(IMG_SIZE, IMG_SIZE, 3),
|
112 |
+
loss_method='features',
|
113 |
+
tanh_process=True,
|
114 |
+
save_last_on_failed=save_last_on_failed,
|
115 |
+
)
|
116 |
+
protected_images = generate_cloak_images(self.protector, original_images)
|
117 |
+
faces.cloaked_cropped_faces = protected_images
|
118 |
+
|
119 |
+
final_images, images_without_face = faces.merge_faces(
|
120 |
+
reverse_process_cloaked(protected_images, preprocess=PREPROCESS),
|
121 |
+
reverse_process_cloaked(original_images, preprocess=PREPROCESS))
|
122 |
+
|
123 |
+
if images_without_face:
|
124 |
+
return None
|
125 |
+
else:
|
126 |
+
return [img for img in final_images][0]
|
127 |
+
# for i in range(len(final_images)):
|
128 |
+
# if i in images_without_face:
|
129 |
+
# continue
|
130 |
+
# p_img = final_images[i]
|
131 |
+
# path = image_paths[i]
|
132 |
+
# file_name = "{}_cloaked.{}".format(".".join(path.split(".")[:-1]), format)
|
133 |
+
# dump_image(p_img, file_name, format=format)
|
134 |
+
|
135 |
+
# print("Done!")
|
136 |
+
# return 1
|
137 |
+
|
138 |
+
|
139 |
+
def main(*argv):
|
140 |
+
if not argv:
|
141 |
+
argv = list(sys.argv)
|
142 |
+
|
143 |
+
try:
|
144 |
+
import signal
|
145 |
+
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
146 |
+
except Exception as e:
|
147 |
+
pass
|
148 |
+
|
149 |
+
parser = argparse.ArgumentParser()
|
150 |
+
parser.add_argument('--directory', '-d', type=str,
|
151 |
+
help='the directory that contains images to run protection', default='imgs/')
|
152 |
+
parser.add_argument('--gpu', '-g', type=str,
|
153 |
+
help='the GPU id when using GPU for optimization', default='0')
|
154 |
+
parser.add_argument('--mode', '-m', type=str,
|
155 |
+
help='cloak generation mode, select from min, low, mid, high. The higher the mode is, '
|
156 |
+
'the more perturbation added and stronger protection',
|
157 |
+
default='low')
|
158 |
+
parser.add_argument('--feature-extractor', type=str,
|
159 |
+
help="name of the feature extractor used for optimization",
|
160 |
+
default="arcface_extractor_0")
|
161 |
+
parser.add_argument('--th', help='only relevant with mode=custom, DSSIM threshold for perturbation', type=float,
|
162 |
+
default=0.01)
|
163 |
+
parser.add_argument('--max-step', help='only relevant with mode=custom, number of steps for optimization', type=int,
|
164 |
+
default=1000)
|
165 |
+
parser.add_argument('--sd', type=int, help='only relevant with mode=custom, penalty number, read more in the paper',
|
166 |
+
default=1e6)
|
167 |
+
parser.add_argument('--lr', type=float, help='only relevant with mode=custom, learning rate', default=2)
|
168 |
+
parser.add_argument('--batch-size', help="number of images to run optimization together", type=int, default=1)
|
169 |
+
parser.add_argument('--separate_target', help="whether select separate targets for each faces in the directory",
|
170 |
+
action='store_true')
|
171 |
+
parser.add_argument('--no-align', help="whether to detect and crop faces",
|
172 |
+
action='store_true')
|
173 |
+
parser.add_argument('--debug', help="turn on debug and copy/paste the stdout when reporting an issue on github",
|
174 |
+
action='store_true')
|
175 |
+
parser.add_argument('--format', type=str,
|
176 |
+
help="format of the output image",
|
177 |
+
default="png")
|
178 |
+
|
179 |
+
args = parser.parse_args(argv[1:])
|
180 |
+
|
181 |
+
assert args.format in ['png', 'jpg', 'jpeg']
|
182 |
+
if args.format == 'jpg':
|
183 |
+
args.format = 'jpeg'
|
184 |
+
|
185 |
+
image_paths = glob.glob(os.path.join(args.directory, "*"))
|
186 |
+
image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]
|
187 |
+
|
188 |
+
protector = Fawkes(args.gpu, args.batch_size, mode=args.mode)
|
189 |
+
|
190 |
+
protector.run_protection(image_paths, th=args.th, sd=args.sd, lr=args.lr,
|
191 |
+
max_step=args.max_step,
|
192 |
+
batch_size=args.batch_size, format=args.format,
|
193 |
+
separate_target=args.separate_target, debug=args.debug, no_align=args.no_align)
|
194 |
+
|
195 |
+
|
196 |
+
if __name__ == '__main__':
|
197 |
+
main(*sys.argv)
|
fawkes/utils.py
ADDED
@@ -0,0 +1,731 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Date : 2020-05-17
|
4 |
+
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
|
5 |
+
# @Link : https://www.shawnshan.com/
|
6 |
+
|
7 |
+
|
8 |
+
import errno
|
9 |
+
import glob
|
10 |
+
import gzip
|
11 |
+
import hashlib
|
12 |
+
import json
|
13 |
+
import os
|
14 |
+
import pickle
|
15 |
+
import random
|
16 |
+
import shutil
|
17 |
+
import sys
|
18 |
+
import tarfile
|
19 |
+
import zipfile
|
20 |
+
|
21 |
+
import PIL
|
22 |
+
import pkg_resources
|
23 |
+
import six
|
24 |
+
from keras.utils import Progbar
|
25 |
+
from six.moves.urllib.error import HTTPError, URLError
|
26 |
+
|
27 |
+
stderr = sys.stderr
|
28 |
+
sys.stderr = open(os.devnull, 'w')
|
29 |
+
import keras
|
30 |
+
|
31 |
+
sys.stderr = stderr
|
32 |
+
import keras.backend as K
|
33 |
+
import numpy as np
|
34 |
+
import tensorflow as tf
|
35 |
+
from PIL import Image, ExifTags
|
36 |
+
from keras.layers import Dense, Activation
|
37 |
+
from keras.models import Model
|
38 |
+
from keras.preprocessing import image
|
39 |
+
|
40 |
+
from fawkes.align_face import align
|
41 |
+
from six.moves.urllib.request import urlopen
|
42 |
+
|
43 |
+
if sys.version_info[0] == 2:
|
44 |
+
def urlretrieve(url, filename, reporthook=None, data=None):
|
45 |
+
def chunk_read(response, chunk_size=8192, reporthook=None):
|
46 |
+
content_type = response.info().get('Content-Length')
|
47 |
+
total_size = -1
|
48 |
+
if content_type is not None:
|
49 |
+
total_size = int(content_type.strip())
|
50 |
+
count = 0
|
51 |
+
while True:
|
52 |
+
chunk = response.read(chunk_size)
|
53 |
+
count += 1
|
54 |
+
if reporthook is not None:
|
55 |
+
reporthook(count, chunk_size, total_size)
|
56 |
+
if chunk:
|
57 |
+
yield chunk
|
58 |
+
else:
|
59 |
+
break
|
60 |
+
|
61 |
+
response = urlopen(url, data)
|
62 |
+
with open(filename, 'wb') as fd:
|
63 |
+
for chunk in chunk_read(response, reporthook=reporthook):
|
64 |
+
fd.write(chunk)
|
65 |
+
else:
|
66 |
+
from six.moves.urllib.request import urlretrieve
|
67 |
+
|
68 |
+
|
69 |
+
def clip_img(X, preprocessing='raw'):
|
70 |
+
X = reverse_preprocess(X, preprocessing)
|
71 |
+
X = np.clip(X, 0.0, 255.0)
|
72 |
+
X = preprocess(X, preprocessing)
|
73 |
+
return X
|
74 |
+
|
75 |
+
|
76 |
+
IMG_SIZE = 112
|
77 |
+
PREPROCESS = 'raw'
|
78 |
+
|
79 |
+
|
80 |
+
def load_image(path):
|
81 |
+
try:
|
82 |
+
img = Image.open(path)
|
83 |
+
except PIL.UnidentifiedImageError:
|
84 |
+
return None
|
85 |
+
except IsADirectoryError:
|
86 |
+
return None
|
87 |
+
|
88 |
+
try:
|
89 |
+
info = img._getexif()
|
90 |
+
except OSError:
|
91 |
+
return None
|
92 |
+
|
93 |
+
if info is not None:
|
94 |
+
for orientation in ExifTags.TAGS.keys():
|
95 |
+
if ExifTags.TAGS[orientation] == 'Orientation':
|
96 |
+
break
|
97 |
+
|
98 |
+
exif = dict(img._getexif().items())
|
99 |
+
if orientation in exif.keys():
|
100 |
+
if exif[orientation] == 3:
|
101 |
+
img = img.rotate(180, expand=True)
|
102 |
+
elif exif[orientation] == 6:
|
103 |
+
img = img.rotate(270, expand=True)
|
104 |
+
elif exif[orientation] == 8:
|
105 |
+
img = img.rotate(90, expand=True)
|
106 |
+
else:
|
107 |
+
pass
|
108 |
+
img = img.convert('RGB')
|
109 |
+
image_array = image.img_to_array(img)
|
110 |
+
|
111 |
+
return image_array
|
112 |
+
|
113 |
+
|
114 |
+
class Faces(object):
|
115 |
+
def __init__(self, image, aligner, verbose=1, eval_local=False, preprocessing=True, no_align=False):
|
116 |
+
self.verbose = verbose
|
117 |
+
self.no_align = no_align
|
118 |
+
self.aligner = aligner
|
119 |
+
self.margin = 30
|
120 |
+
self.org_faces = [image] # single image in a list
|
121 |
+
self.cropped_faces = []
|
122 |
+
self.cropped_faces_shape = []
|
123 |
+
self.cropped_index = []
|
124 |
+
self.start_end_ls = []
|
125 |
+
self.callback_idx = []
|
126 |
+
self.images_without_face = []
|
127 |
+
|
128 |
+
# Processing the single image
|
129 |
+
cur_img = np.array(image)
|
130 |
+
|
131 |
+
if not self.no_align:
|
132 |
+
align_img = align(cur_img, self.aligner)
|
133 |
+
if align_img is None:
|
134 |
+
if self.verbose:
|
135 |
+
print("Find 0 face(s) in the image")
|
136 |
+
self.images_without_face.append(0)
|
137 |
+
return
|
138 |
+
|
139 |
+
cur_faces = align_img[0]
|
140 |
+
else:
|
141 |
+
cur_faces = [cur_img]
|
142 |
+
|
143 |
+
cur_faces = [face for face in cur_faces if face.shape[0] != 0 and face.shape[1] != 0]
|
144 |
+
cur_shapes = [f.shape[:-1] for f in cur_faces]
|
145 |
+
|
146 |
+
cur_faces_square = []
|
147 |
+
if self.verbose and not self.no_align:
|
148 |
+
print("Find {} face(s) in the image".format(len(cur_faces)))
|
149 |
+
if eval_local:
|
150 |
+
cur_faces = cur_faces[:1]
|
151 |
+
|
152 |
+
for img in cur_faces:
|
153 |
+
if eval_local:
|
154 |
+
base = resize(img, (IMG_SIZE, IMG_SIZE))
|
155 |
+
else:
|
156 |
+
long_size = max([img.shape[1], img.shape[0]]) + self.margin
|
157 |
+
|
158 |
+
base = np.ones((long_size, long_size, 3)) * np.mean(img, axis=(0, 1))
|
159 |
+
|
160 |
+
start1, end1 = get_ends(long_size, img.shape[0])
|
161 |
+
start2, end2 = get_ends(long_size, img.shape[1])
|
162 |
+
|
163 |
+
base[start1:end1, start2:end2, :] = img
|
164 |
+
cur_start_end = (start1, end1, start2, end2)
|
165 |
+
self.start_end_ls.append(cur_start_end)
|
166 |
+
|
167 |
+
cur_faces_square.append(base)
|
168 |
+
cur_faces_square = [resize(f, (IMG_SIZE, IMG_SIZE)) for f in cur_faces_square]
|
169 |
+
self.cropped_faces.extend(cur_faces_square)
|
170 |
+
|
171 |
+
if not self.no_align:
|
172 |
+
cur_index = align_img[1]
|
173 |
+
self.cropped_faces_shape.extend(cur_shapes)
|
174 |
+
self.cropped_index.extend(cur_index[:len(cur_faces_square)])
|
175 |
+
self.callback_idx.extend([0]*len(cur_faces_square))
|
176 |
+
|
177 |
+
if len(self.cropped_faces) == 0:
|
178 |
+
return
|
179 |
+
|
180 |
+
self.cropped_faces = np.array(self.cropped_faces)
|
181 |
+
|
182 |
+
if preprocessing:
|
183 |
+
self.cropped_faces = preprocess(self.cropped_faces, PREPROCESS)
|
184 |
+
|
185 |
+
self.cloaked_cropped_faces = None
|
186 |
+
self.cloaked_faces = np.copy(self.org_faces)
|
187 |
+
|
188 |
+
def get_faces(self):
|
189 |
+
return self.cropped_faces
|
190 |
+
|
191 |
+
def merge_faces(self, protected_images, original_images):
|
192 |
+
if self.no_align:
|
193 |
+
return np.clip(protected_images, 0.0, 255.0), self.images_without_face
|
194 |
+
|
195 |
+
self.cloaked_faces = np.copy(self.org_faces)
|
196 |
+
|
197 |
+
for i in range(len(self.cropped_faces)):
|
198 |
+
cur_protected = protected_images[i]
|
199 |
+
cur_original = original_images[i]
|
200 |
+
|
201 |
+
org_shape = self.cropped_faces_shape[i]
|
202 |
+
|
203 |
+
old_square_shape = max([org_shape[0], org_shape[1]]) + self.margin
|
204 |
+
|
205 |
+
cur_protected = resize(cur_protected, (old_square_shape, old_square_shape))
|
206 |
+
cur_original = resize(cur_original, (old_square_shape, old_square_shape))
|
207 |
+
|
208 |
+
start1, end1, start2, end2 = self.start_end_ls[i]
|
209 |
+
|
210 |
+
reshape_cloak = cur_protected - cur_original
|
211 |
+
reshape_cloak = reshape_cloak[start1:end1, start2:end2, :]
|
212 |
+
|
213 |
+
callback_id = self.callback_idx[i]
|
214 |
+
bb = self.cropped_index[i]
|
215 |
+
self.cloaked_faces[callback_id][bb[0]:bb[2], bb[1]:bb[3], :] += reshape_cloak.astype(np.uint8)
|
216 |
+
|
217 |
+
for i in range(0, len(self.cloaked_faces)):
|
218 |
+
self.cloaked_faces[i] = np.clip(self.cloaked_faces[i], 0.0, 255.0)
|
219 |
+
return self.cloaked_faces, self.images_without_face
|
220 |
+
|
221 |
+
|
222 |
+
def get_ends(longsize, window):
|
223 |
+
start = (longsize - window) // 2
|
224 |
+
end = start + window
|
225 |
+
return start, end
|
226 |
+
|
227 |
+
|
228 |
+
def dump_dictionary_as_json(dict, outfile):
|
229 |
+
j = json.dumps(dict)
|
230 |
+
with open(outfile, "wb") as f:
|
231 |
+
f.write(j.encode())
|
232 |
+
|
233 |
+
|
234 |
+
def load_victim_model(number_classes, teacher_model=None, end2end=False):
|
235 |
+
for l in teacher_model.layers:
|
236 |
+
l.trainable = end2end
|
237 |
+
x = teacher_model.layers[-1].output
|
238 |
+
|
239 |
+
x = Dense(number_classes)(x)
|
240 |
+
x = Activation('softmax', name="act")(x)
|
241 |
+
model = Model(teacher_model.input, x)
|
242 |
+
opt = keras.optimizers.Adadelta()
|
243 |
+
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
|
244 |
+
return model
|
245 |
+
|
246 |
+
|
247 |
+
def resize(img, sz):
|
248 |
+
assert np.min(img) >= 0 and np.max(img) <= 255.0
|
249 |
+
from keras.preprocessing import image
|
250 |
+
im_data = image.array_to_img(img).resize((sz[1], sz[0]))
|
251 |
+
im_data = image.img_to_array(im_data)
|
252 |
+
return im_data
|
253 |
+
|
254 |
+
|
255 |
+
def init_gpu(gpu):
|
256 |
+
''' code to initialize gpu in tf2'''
|
257 |
+
if isinstance(gpu, list):
|
258 |
+
gpu_num = ','.join([str(i) for i in gpu])
|
259 |
+
else:
|
260 |
+
gpu_num = str(gpu)
|
261 |
+
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
262 |
+
print('GPU already initiated')
|
263 |
+
return
|
264 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
|
265 |
+
gpus = tf.config.experimental.list_physical_devices('GPU')
|
266 |
+
if gpus:
|
267 |
+
try:
|
268 |
+
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
|
269 |
+
tf.config.experimental.set_memory_growth(gpus[0], True)
|
270 |
+
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
|
271 |
+
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
|
272 |
+
except RuntimeError as e:
|
273 |
+
print(e)
|
274 |
+
|
275 |
+
|
276 |
+
def fix_gpu_memory(mem_fraction=1):
|
277 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
278 |
+
tf_config = None
|
279 |
+
if tf.test.is_gpu_available():
|
280 |
+
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
|
281 |
+
tf_config = tf.ConfigProto(gpu_options=gpu_options)
|
282 |
+
tf_config.gpu_options.allow_growth = True
|
283 |
+
tf_config.log_device_placement = False
|
284 |
+
init_op = tf.global_variables_initializer()
|
285 |
+
sess = tf.Session(config=tf_config)
|
286 |
+
sess.run(init_op)
|
287 |
+
K.set_session(sess)
|
288 |
+
return sess
|
289 |
+
|
290 |
+
|
291 |
+
def preprocess(X, method):
|
292 |
+
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
293 |
+
|
294 |
+
if method == 'raw':
|
295 |
+
pass
|
296 |
+
elif method == 'imagenet':
|
297 |
+
X = imagenet_preprocessing(X)
|
298 |
+
else:
|
299 |
+
raise Exception('unknown method %s' % method)
|
300 |
+
|
301 |
+
return X
|
302 |
+
|
303 |
+
|
304 |
+
def reverse_preprocess(X, method):
|
305 |
+
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
306 |
+
|
307 |
+
if method == 'raw':
|
308 |
+
pass
|
309 |
+
elif method == 'imagenet':
|
310 |
+
X = imagenet_reverse_preprocessing(X)
|
311 |
+
else:
|
312 |
+
raise Exception('unknown method %s' % method)
|
313 |
+
|
314 |
+
return X
|
315 |
+
|
316 |
+
|
317 |
+
def imagenet_preprocessing(x, data_format=None):
|
318 |
+
if data_format is None:
|
319 |
+
data_format = K.image_data_format()
|
320 |
+
assert data_format in ('channels_last', 'channels_first')
|
321 |
+
|
322 |
+
x = np.array(x)
|
323 |
+
if data_format == 'channels_first':
|
324 |
+
# 'RGB'->'BGR'
|
325 |
+
if x.ndim == 3:
|
326 |
+
x = x[::-1, ...]
|
327 |
+
else:
|
328 |
+
x = x[:, ::-1, ...]
|
329 |
+
else:
|
330 |
+
# 'RGB'->'BGR'
|
331 |
+
x = x[..., ::-1]
|
332 |
+
|
333 |
+
mean = [103.939, 116.779, 123.68]
|
334 |
+
std = None
|
335 |
+
|
336 |
+
# Zero-center by mean pixel
|
337 |
+
if data_format == 'channels_first':
|
338 |
+
if x.ndim == 3:
|
339 |
+
x[0, :, :] -= mean[0]
|
340 |
+
x[1, :, :] -= mean[1]
|
341 |
+
x[2, :, :] -= mean[2]
|
342 |
+
if std is not None:
|
343 |
+
x[0, :, :] /= std[0]
|
344 |
+
x[1, :, :] /= std[1]
|
345 |
+
x[2, :, :] /= std[2]
|
346 |
+
else:
|
347 |
+
x[:, 0, :, :] -= mean[0]
|
348 |
+
x[:, 1, :, :] -= mean[1]
|
349 |
+
x[:, 2, :, :] -= mean[2]
|
350 |
+
if std is not None:
|
351 |
+
x[:, 0, :, :] /= std[0]
|
352 |
+
x[:, 1, :, :] /= std[1]
|
353 |
+
x[:, 2, :, :] /= std[2]
|
354 |
+
else:
|
355 |
+
x[..., 0] -= mean[0]
|
356 |
+
x[..., 1] -= mean[1]
|
357 |
+
x[..., 2] -= mean[2]
|
358 |
+
if std is not None:
|
359 |
+
x[..., 0] /= std[0]
|
360 |
+
x[..., 1] /= std[1]
|
361 |
+
x[..., 2] /= std[2]
|
362 |
+
|
363 |
+
return x
|
364 |
+
|
365 |
+
|
366 |
+
def imagenet_reverse_preprocessing(x, data_format=None):
|
367 |
+
import keras.backend as K
|
368 |
+
x = np.array(x)
|
369 |
+
if data_format is None:
|
370 |
+
data_format = K.image_data_format()
|
371 |
+
assert data_format in ('channels_last', 'channels_first')
|
372 |
+
|
373 |
+
if data_format == 'channels_first':
|
374 |
+
if x.ndim == 3:
|
375 |
+
# Zero-center by mean pixel
|
376 |
+
x[0, :, :] += 103.939
|
377 |
+
x[1, :, :] += 116.779
|
378 |
+
x[2, :, :] += 123.68
|
379 |
+
# 'BGR'->'RGB'
|
380 |
+
x = x[::-1, :, :]
|
381 |
+
else:
|
382 |
+
x[:, 0, :, :] += 103.939
|
383 |
+
x[:, 1, :, :] += 116.779
|
384 |
+
x[:, 2, :, :] += 123.68
|
385 |
+
x = x[:, ::-1, :, :]
|
386 |
+
else:
|
387 |
+
# Zero-center by mean pixel
|
388 |
+
x[..., 0] += 103.939
|
389 |
+
x[..., 1] += 116.779
|
390 |
+
x[..., 2] += 123.68
|
391 |
+
# 'BGR'->'RGB'
|
392 |
+
x = x[..., ::-1]
|
393 |
+
return x
|
394 |
+
|
395 |
+
|
396 |
+
def reverse_process_cloaked(x, preprocess='imagenet'):
|
397 |
+
# x = clip_img(x, preprocess)
|
398 |
+
return reverse_preprocess(x, preprocess)
|
399 |
+
|
400 |
+
|
401 |
+
def build_bottleneck_model(model, cut_off):
|
402 |
+
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
|
403 |
+
bottleneck_model.compile(loss='categorical_crossentropy',
|
404 |
+
optimizer='adam',
|
405 |
+
metrics=['accuracy'])
|
406 |
+
return bottleneck_model
|
407 |
+
|
408 |
+
|
409 |
+
def load_extractor(name):
|
410 |
+
hash_map = {"extractor_2": "ce703d481db2b83513bbdafa27434703",
|
411 |
+
"extractor_0": "94854151fd9077997d69ceda107f9c6b"}
|
412 |
+
assert name in ["extractor_2", 'extractor_0']
|
413 |
+
model_file = pkg_resources.resource_filename("fawkes", "model/{}.h5".format(name))
|
414 |
+
cur_hash = hash_map[name]
|
415 |
+
model_dir = pkg_resources.resource_filename("fawkes", "model/")
|
416 |
+
os.makedirs(model_dir, exist_ok=True)
|
417 |
+
get_file("{}.h5".format(name), "http://mirror.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
|
418 |
+
cache_dir=model_dir, cache_subdir='', md5_hash=cur_hash)
|
419 |
+
|
420 |
+
model = keras.models.load_model(model_file)
|
421 |
+
model = Extractor(model)
|
422 |
+
return model
|
423 |
+
|
424 |
+
|
425 |
+
class Extractor(object):
|
426 |
+
def __init__(self, model):
|
427 |
+
self.model = model
|
428 |
+
|
429 |
+
def predict(self, imgs):
|
430 |
+
imgs = imgs / 255.0
|
431 |
+
embeds = l2_norm(self.model(imgs))
|
432 |
+
return embeds
|
433 |
+
|
434 |
+
def __call__(self, x):
|
435 |
+
return self.predict(x)
|
436 |
+
|
437 |
+
|
438 |
+
def get_dataset_path(dataset):
|
439 |
+
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
440 |
+
if not os.path.exists(os.path.join(model_dir, "config.json")):
|
441 |
+
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
|
442 |
+
|
443 |
+
config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
|
444 |
+
if dataset not in config:
|
445 |
+
raise Exception(
|
446 |
+
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
|
447 |
+
dataset))
|
448 |
+
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
|
449 |
+
'num_images']
|
450 |
+
|
451 |
+
|
452 |
+
def dump_image(x, filename, format="png", scale=False):
|
453 |
+
img = image.array_to_img(x, scale=scale)
|
454 |
+
img.save(filename, format)
|
455 |
+
return
|
456 |
+
|
457 |
+
|
458 |
+
def load_embeddings(feature_extractors_names):
|
459 |
+
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
460 |
+
for extractor_name in feature_extractors_names:
|
461 |
+
fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
|
462 |
+
path2emb = pickle.load(fp)
|
463 |
+
fp.close()
|
464 |
+
|
465 |
+
return path2emb
|
466 |
+
|
467 |
+
|
468 |
+
def extractor_ls_predict(feature_extractors_ls, X):
|
469 |
+
feature_ls = []
|
470 |
+
for extractor in feature_extractors_ls:
|
471 |
+
cur_features = extractor.predict(X)
|
472 |
+
feature_ls.append(cur_features)
|
473 |
+
concated_feature_ls = np.concatenate(feature_ls, axis=1)
|
474 |
+
return concated_feature_ls
|
475 |
+
|
476 |
+
|
477 |
+
def pairwise_l2_distance(A, B):
|
478 |
+
BT = B.transpose()
|
479 |
+
vecProd = np.dot(A, BT)
|
480 |
+
SqA = A ** 2
|
481 |
+
sumSqA = np.matrix(np.sum(SqA, axis=1))
|
482 |
+
sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))
|
483 |
+
|
484 |
+
SqB = B ** 2
|
485 |
+
sumSqB = np.sum(SqB, axis=1)
|
486 |
+
sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
|
487 |
+
SqED = sumSqBEx + sumSqAEx - 2 * vecProd
|
488 |
+
SqED[SqED < 0] = 0.0
|
489 |
+
ED = np.sqrt(SqED)
|
490 |
+
return ED
|
491 |
+
|
492 |
+
|
493 |
+
def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
|
494 |
+
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
495 |
+
|
496 |
+
original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)
|
497 |
+
|
498 |
+
path2emb = load_embeddings(feature_extractors_names)
|
499 |
+
|
500 |
+
items = list([(k, v) for k, v in path2emb.items()])
|
501 |
+
paths = [p[0] for p in items]
|
502 |
+
embs = [p[1] for p in items]
|
503 |
+
embs = np.array(embs)
|
504 |
+
|
505 |
+
pair_dist = pairwise_l2_distance(original_feature_x, embs)
|
506 |
+
pair_dist = np.array(pair_dist)
|
507 |
+
|
508 |
+
max_sum = np.min(pair_dist, axis=0)
|
509 |
+
max_id_ls = np.argsort(max_sum)[::-1]
|
510 |
+
|
511 |
+
max_id = random.choice(max_id_ls[:20])
|
512 |
+
|
513 |
+
target_data_id = paths[int(max_id)]
|
514 |
+
print("target ID: {}".format(target_data_id))
|
515 |
+
|
516 |
+
image_dir = os.path.join(model_dir, "target_data/{}".format(target_data_id))
|
517 |
+
|
518 |
+
os.makedirs(os.path.join(model_dir, "target_data"), exist_ok=True)
|
519 |
+
os.makedirs(image_dir, exist_ok=True)
|
520 |
+
for i in range(10):
|
521 |
+
if os.path.exists(os.path.join(model_dir, "target_data/{}/{}.jpg".format(target_data_id, i))):
|
522 |
+
continue
|
523 |
+
try:
|
524 |
+
get_file("{}.jpg".format(i),
|
525 |
+
"http://mirror.cs.uchicago.edu/fawkes/files/target_data/{}/{}.jpg".format(target_data_id, i),
|
526 |
+
cache_dir=model_dir, cache_subdir='target_data/{}/'.format(target_data_id))
|
527 |
+
except Exception:
|
528 |
+
pass
|
529 |
+
|
530 |
+
image_paths = glob.glob(image_dir + "/*.jpg")
|
531 |
+
|
532 |
+
target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
|
533 |
+
image_paths]
|
534 |
+
|
535 |
+
target_images = np.array([resize(x, (IMG_SIZE, IMG_SIZE)) for x in target_images])
|
536 |
+
target_images = preprocess(target_images, PREPROCESS)
|
537 |
+
|
538 |
+
target_images = list(target_images)
|
539 |
+
while len(target_images) < len(imgs):
|
540 |
+
target_images += target_images
|
541 |
+
|
542 |
+
target_images = random.sample(target_images, len(imgs))
|
543 |
+
return np.array(target_images)
|
544 |
+
|
545 |
+
|
546 |
+
def l2_norm(x, axis=1):
|
547 |
+
"""l2 norm"""
|
548 |
+
norm = tf.norm(x, axis=axis, keepdims=True)
|
549 |
+
output = x / norm
|
550 |
+
return output
|
551 |
+
|
552 |
+
|
553 |
+
""" TensorFlow implementation get_file
|
554 |
+
https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/utils/data_utils.py#L168-L297
|
555 |
+
"""
|
556 |
+
|
557 |
+
|
558 |
+
def get_file(fname,
|
559 |
+
origin,
|
560 |
+
untar=False,
|
561 |
+
md5_hash=None,
|
562 |
+
file_hash=None,
|
563 |
+
cache_subdir='datasets',
|
564 |
+
hash_algorithm='auto',
|
565 |
+
extract=False,
|
566 |
+
archive_format='auto',
|
567 |
+
cache_dir=None):
|
568 |
+
if cache_dir is None:
|
569 |
+
cache_dir = os.path.join(os.path.expanduser('~'), '.keras')
|
570 |
+
if md5_hash is not None and file_hash is None:
|
571 |
+
file_hash = md5_hash
|
572 |
+
hash_algorithm = 'md5'
|
573 |
+
datadir_base = os.path.expanduser(cache_dir)
|
574 |
+
if not os.access(datadir_base, os.W_OK):
|
575 |
+
datadir_base = os.path.join('/tmp', '.keras')
|
576 |
+
datadir = os.path.join(datadir_base, cache_subdir)
|
577 |
+
_makedirs_exist_ok(datadir)
|
578 |
+
|
579 |
+
# fname = path_to_string(fname)
|
580 |
+
|
581 |
+
if untar:
|
582 |
+
untar_fpath = os.path.join(datadir, fname)
|
583 |
+
fpath = untar_fpath + '.tar.gz'
|
584 |
+
else:
|
585 |
+
fpath = os.path.join(datadir, fname)
|
586 |
+
|
587 |
+
download = False
|
588 |
+
if os.path.exists(fpath):
|
589 |
+
# File found; verify integrity if a hash was provided.
|
590 |
+
if file_hash is not None:
|
591 |
+
if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
|
592 |
+
print('A local file was found, but it seems to be '
|
593 |
+
'incomplete or outdated because the ' + hash_algorithm +
|
594 |
+
' file hash does not match the original value of ' + file_hash +
|
595 |
+
' so we will re-download the data.')
|
596 |
+
download = True
|
597 |
+
else:
|
598 |
+
download = True
|
599 |
+
|
600 |
+
if download:
|
601 |
+
print('Downloading data from', origin)
|
602 |
+
|
603 |
+
class ProgressTracker(object):
|
604 |
+
# Maintain progbar for the lifetime of download.
|
605 |
+
# This design was chosen for Python 2.7 compatibility.
|
606 |
+
progbar = None
|
607 |
+
|
608 |
+
def dl_progress(count, block_size, total_size):
|
609 |
+
if ProgressTracker.progbar is None:
|
610 |
+
if total_size == -1:
|
611 |
+
total_size = None
|
612 |
+
ProgressTracker.progbar = Progbar(total_size)
|
613 |
+
else:
|
614 |
+
ProgressTracker.progbar.update(count * block_size)
|
615 |
+
|
616 |
+
error_msg = 'URL fetch failure on {}: {} -- {}'
|
617 |
+
try:
|
618 |
+
try:
|
619 |
+
urlretrieve(origin, fpath, dl_progress)
|
620 |
+
except HTTPError as e:
|
621 |
+
raise Exception(error_msg.format(origin, e.code, e.msg))
|
622 |
+
except URLError as e:
|
623 |
+
raise Exception(error_msg.format(origin, e.errno, e.reason))
|
624 |
+
except (Exception, KeyboardInterrupt) as e:
|
625 |
+
if os.path.exists(fpath):
|
626 |
+
os.remove(fpath)
|
627 |
+
raise
|
628 |
+
ProgressTracker.progbar = None
|
629 |
+
|
630 |
+
if untar:
|
631 |
+
if not os.path.exists(untar_fpath):
|
632 |
+
_extract_archive(fpath, datadir, archive_format='tar')
|
633 |
+
return untar_fpath
|
634 |
+
|
635 |
+
if extract:
|
636 |
+
_extract_archive(fpath, datadir, archive_format)
|
637 |
+
|
638 |
+
return fpath
|
639 |
+
|
640 |
+
|
641 |
+
def _extract_archive(file_path, path='.', archive_format='auto'):
|
642 |
+
if archive_format is None:
|
643 |
+
return False
|
644 |
+
if archive_format == 'auto':
|
645 |
+
archive_format = ['tar', 'zip']
|
646 |
+
if isinstance(archive_format, six.string_types):
|
647 |
+
archive_format = [archive_format]
|
648 |
+
|
649 |
+
for archive_type in archive_format:
|
650 |
+
if archive_type == 'tar':
|
651 |
+
open_fn = tarfile.open
|
652 |
+
is_match_fn = tarfile.is_tarfile
|
653 |
+
if archive_type == 'zip':
|
654 |
+
open_fn = zipfile.ZipFile
|
655 |
+
is_match_fn = zipfile.is_zipfile
|
656 |
+
|
657 |
+
if is_match_fn(file_path):
|
658 |
+
with open_fn(file_path) as archive:
|
659 |
+
try:
|
660 |
+
archive.extractall(path)
|
661 |
+
except (tarfile.TarError, RuntimeError, KeyboardInterrupt):
|
662 |
+
if os.path.exists(path):
|
663 |
+
if os.path.isfile(path):
|
664 |
+
os.remove(path)
|
665 |
+
else:
|
666 |
+
shutil.rmtree(path)
|
667 |
+
raise
|
668 |
+
return True
|
669 |
+
return False
|
670 |
+
|
671 |
+
|
672 |
+
def _makedirs_exist_ok(datadir):
|
673 |
+
if six.PY2:
|
674 |
+
# Python 2 doesn't have the exist_ok arg, so we try-except here.
|
675 |
+
try:
|
676 |
+
os.makedirs(datadir)
|
677 |
+
except OSError as e:
|
678 |
+
if e.errno != errno.EEXIST:
|
679 |
+
raise
|
680 |
+
else:
|
681 |
+
os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg
|
682 |
+
|
683 |
+
|
684 |
+
def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535):
|
685 |
+
"""Validates a file against a sha256 or md5 hash.
|
686 |
+
Arguments:
|
687 |
+
fpath: path to the file being validated
|
688 |
+
file_hash: The expected hash string of the file.
|
689 |
+
The sha256 and md5 hash algorithms are both supported.
|
690 |
+
algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'.
|
691 |
+
The default 'auto' detects the hash algorithm in use.
|
692 |
+
chunk_size: Bytes to read at a time, important for large files.
|
693 |
+
Returns:
|
694 |
+
Whether the file is valid
|
695 |
+
"""
|
696 |
+
if (algorithm == 'sha256') or (algorithm == 'auto' and len(file_hash) == 64):
|
697 |
+
hasher = 'sha256'
|
698 |
+
else:
|
699 |
+
hasher = 'md5'
|
700 |
+
|
701 |
+
if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash):
|
702 |
+
return True
|
703 |
+
else:
|
704 |
+
return False
|
705 |
+
|
706 |
+
|
707 |
+
def _hash_file(fpath, algorithm='sha256', chunk_size=65535):
|
708 |
+
"""Calculates a file sha256 or md5 hash.
|
709 |
+
Example:
|
710 |
+
```python
|
711 |
+
_hash_file('/path/to/file.zip')
|
712 |
+
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
|
713 |
+
```
|
714 |
+
Arguments:
|
715 |
+
fpath: path to the file being validated
|
716 |
+
algorithm: hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`.
|
717 |
+
The default `'auto'` detects the hash algorithm in use.
|
718 |
+
chunk_size: Bytes to read at a time, important for large files.
|
719 |
+
Returns:
|
720 |
+
The file hash
|
721 |
+
"""
|
722 |
+
if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64):
|
723 |
+
hasher = hashlib.sha256()
|
724 |
+
else:
|
725 |
+
hasher = hashlib.md5()
|
726 |
+
|
727 |
+
with open(fpath, 'rb') as fpath_file:
|
728 |
+
for chunk in iter(lambda: fpath_file.read(chunk_size), b''):
|
729 |
+
hasher.update(chunk)
|
730 |
+
|
731 |
+
return hasher.hexdigest()
|