Kieran Fraser commited on
Commit
82d0451
1 Parent(s): 25413fe

Update to evasion

Browse files
Files changed (2) hide show
  1. app.py +226 -419
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,5 +1,5 @@
1
  '''
2
- ART-JATIC Gradio Example App
3
 
4
  To run:
5
  - clone the repository
@@ -25,85 +25,109 @@ from art.attacks.poisoning.perturbations import insert_image
25
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
 
27
  css = """
 
 
 
 
28
  .input-image { margin: auto !important }
29
  .plot-padding { padding: 20px; }
 
 
 
 
 
 
 
 
 
 
30
  """
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def clf_evasion_evaluate(*args):
33
  '''
34
  Run a classification task evaluation
35
  '''
36
  attack = args[0]
37
- model_type = args[1]
38
- model_url = args[2]
39
- model_channels = args[3]
40
- model_height = args[4]
41
- model_width = args[5]
42
- model_classes = args[6]
43
- model_clip = args[7]
44
- model_upsample = args[8]
45
- attack_max_iter = args[9]
46
- attack_eps = args[10]
47
- attack_eps_steps = args[11]
48
- x_location = args[12]
49
- y_location = args[13]
50
- patch_height = args[14]
51
- patch_width = args[15]
52
- data_type = args[-1]
53
 
54
- if model_type == "Example":
55
- model = transformers.AutoModelForImageClassification.from_pretrained(
56
- 'facebook/deit-tiny-distilled-patch16-224',
57
- ignore_mismatched_sizes=True,
58
- num_labels=10
59
- )
60
- upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
61
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
62
- loss_fn = torch.nn.CrossEntropyLoss()
63
-
64
- hf_model = HuggingFaceClassifierPyTorch(
65
- model=model,
66
- loss=loss_fn,
67
- optimizer=optimizer,
68
- input_shape=(3, 32, 32),
69
- nb_classes=10,
70
- clip_values=(0, 1),
71
- processor=upsampler
72
- )
73
- model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
74
- hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
75
-
76
- if data_type == "Example":
77
- (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
78
- x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
79
- y_train = np.argmax(y_train, axis=1)
80
 
81
- classes = np.unique(y_train)
82
- samples_per_class = 1
83
 
84
- x_subset = []
85
- y_subset = []
86
 
87
- for c in classes:
88
- indices = y_train == c
89
- x_subset.append(x_train[indices][:samples_per_class])
90
- y_subset.append(y_train[indices][:samples_per_class])
91
 
92
- x_subset = np.concatenate(x_subset)
93
- y_subset = np.concatenate(y_subset)
94
-
95
- label_names = [
96
- 'airplane',
97
- 'automobile',
98
- 'bird',
99
- 'cat',
100
- 'deer',
101
- 'dog',
102
- 'frog',
103
- 'horse',
104
- 'ship',
105
- 'truck',
106
- ]
107
 
108
  outputs = hf_model.predict(x_subset)
109
  clean_preds = np.argmax(outputs, axis=1)
@@ -124,7 +148,11 @@ def clf_evasion_evaluate(*args):
124
  for i, im in enumerate(x_adv):
125
  adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
126
 
127
- delta = ((x_subset - x_adv) + 8/255) * 10
 
 
 
 
128
  delta_gallery_out = delta.transpose(0, 2, 3, 1)
129
 
130
  if attack == "Adversarial Patch":
@@ -150,132 +178,8 @@ def clf_evasion_evaluate(*args):
150
  adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
151
 
152
  delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1)
153
-
154
- return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc
155
-
156
- def clf_poison_evaluate(*args):
157
-
158
- attack = args[0]
159
- model_type = args[1]
160
- trigger_image = args[2]
161
- target_class = args[3]
162
- data_type = args[-1]
163
-
164
-
165
- if model_type == "Example":
166
- model = transformers.AutoModelForImageClassification.from_pretrained(
167
- 'facebook/deit-tiny-distilled-patch16-224',
168
- ignore_mismatched_sizes=True,
169
- num_labels=10
170
- )
171
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
172
- loss_fn = torch.nn.CrossEntropyLoss()
173
-
174
- poison_hf_model = HuggingFaceClassifierPyTorch(
175
- model=model,
176
- loss=loss_fn,
177
- optimizer=optimizer,
178
- input_shape=(3, 224, 224),
179
- nb_classes=10,
180
- clip_values=(0, 1),
181
- )
182
-
183
- if data_type == "Example":
184
- import torchvision
185
- transform = torchvision.transforms.Compose([
186
- torchvision.transforms.Resize((224, 224)),
187
- torchvision.transforms.ToTensor(),
188
- ])
189
- train_dataset = torchvision.datasets.ImageFolder(root="./data/imagenette2-320/train", transform=transform)
190
- labels = np.asarray(train_dataset.targets)
191
- classes = np.unique(labels)
192
- samples_per_class = 100
193
-
194
- x_subset = []
195
- y_subset = []
196
-
197
- for c in classes:
198
- indices = np.where(labels == c)[0][:samples_per_class]
199
- for i in indices:
200
- x_subset.append(train_dataset[i][0])
201
- y_subset.append(train_dataset[i][1])
202
-
203
- x_subset = np.stack(x_subset)
204
- y_subset = np.asarray(y_subset)
205
- label_names = [
206
- 'fish',
207
- 'dog',
208
- 'cassette player',
209
- 'chainsaw',
210
- 'church',
211
- 'french horn',
212
- 'garbage truck',
213
- 'gas pump',
214
- 'golf ball',
215
- 'parachutte',
216
- ]
217
-
218
- if attack == "Backdoor":
219
- from PIL import Image
220
- im = Image.fromarray(trigger_image)
221
- im.save("./tmp.png")
222
- def poison_func(x):
223
- return insert_image(
224
- x,
225
- backdoor_path='./tmp.png',
226
- channels_first=True,
227
- random=False,
228
- x_shift=0,
229
- y_shift=0,
230
- size=(32, 32),
231
- mode='RGB',
232
- blend=0.8
233
- )
234
- backdoor = PoisoningAttackBackdoor(poison_func)
235
- source_class = 0
236
- target_class = label_names.index(target_class)
237
- poison_percent = 0.5
238
-
239
- x_poison = np.copy(x_subset)
240
- y_poison = np.copy(y_subset)
241
- is_poison = np.zeros(len(x_subset)).astype(bool)
242
-
243
- indices = np.where(y_subset == source_class)[0]
244
- num_poison = int(poison_percent * len(indices))
245
-
246
- for i in indices[:num_poison]:
247
- x_poison[i], _ = backdoor.poison(x_poison[i], [])
248
- y_poison[i] = target_class
249
- is_poison[i] = True
250
-
251
- poison_indices = np.where(is_poison)[0]
252
- poison_hf_model.fit(x_poison, y_poison, nb_epochs=2)
253
-
254
- clean_x = x_poison[~is_poison]
255
- clean_y = y_poison[~is_poison]
256
-
257
- outputs = poison_hf_model.predict(clean_x)
258
- clean_preds = np.argmax(outputs, axis=1)
259
- clean_acc = np.mean(clean_preds == clean_y)
260
-
261
- clean_out = []
262
- for i, im in enumerate(clean_x):
263
- clean_out.append( (im.transpose(1,2,0), label_names[clean_preds[i]]) )
264
-
265
- poison_x = x_poison[is_poison]
266
- poison_y = y_poison[is_poison]
267
-
268
- outputs = poison_hf_model.predict(poison_x)
269
- poison_preds = np.argmax(outputs, axis=1)
270
- poison_acc = np.mean(poison_preds == poison_y)
271
-
272
- poison_out = []
273
- for i, im in enumerate(poison_x):
274
- poison_out.append( (im.transpose(1,2,0), label_names[poison_preds[i]]) )
275
-
276
-
277
- return clean_out, poison_out, clean_acc, poison_acc
278
 
 
279
 
280
  def show_params(type):
281
  '''
@@ -283,254 +187,157 @@ def show_params(type):
283
  '''
284
  if type!="Example":
285
  return gr.Column(visible=True)
286
- return gr.Column(visible=False)
287
-
288
- def run_inference(*args):
289
- model_type = args[0]
290
- model_url = args[1]
291
- model_channels = args[2]
292
- model_height = args[3]
293
- model_width = args[4]
294
- model_classes = args[5]
295
- model_clip = args[6]
296
- model_upsample = args[7]
297
- data_type = args[8]
298
-
299
- if model_type == "Example":
300
- model = transformers.AutoModelForImageClassification.from_pretrained(
301
- 'facebook/deit-tiny-distilled-patch16-224',
302
- ignore_mismatched_sizes=True,
303
- num_labels=10
304
- )
305
- upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
306
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
307
- loss_fn = torch.nn.CrossEntropyLoss()
308
-
309
- hf_model = HuggingFaceClassifierPyTorch(
310
- model=model,
311
- loss=loss_fn,
312
- optimizer=optimizer,
313
- input_shape=(3, 32, 32),
314
- nb_classes=10,
315
- clip_values=(0, 1),
316
- processor=upsampler
317
- )
318
- model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
319
- hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
320
-
321
- if data_type == "Example":
322
- (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
323
- x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
324
- y_train = np.argmax(y_train, axis=1)
325
-
326
- classes = np.unique(y_train)
327
- samples_per_class = 5
328
-
329
- x_subset = []
330
- y_subset = []
331
-
332
- for c in classes:
333
- indices = y_train == c
334
- x_subset.append(x_train[indices][:samples_per_class])
335
- y_subset.append(y_train[indices][:samples_per_class])
336
-
337
- x_subset = np.concatenate(x_subset)
338
- y_subset = np.concatenate(y_subset)
339
-
340
- label_names = [
341
- 'airplane',
342
- 'automobile',
343
- 'bird',
344
- 'cat',
345
- 'deer',
346
- 'dog',
347
- 'frog',
348
- 'horse',
349
- 'ship',
350
- 'truck',
351
- ]
352
-
353
- outputs = hf_model.predict(x_subset)
354
- clean_preds = np.argmax(outputs, axis=1)
355
- clean_acc = np.mean(clean_preds == y_subset)
356
- gallery_out = []
357
- for i, im in enumerate(x_subset):
358
- gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
359
-
360
- return gallery_out, clean_acc
361
-
362
-
363
 
364
  # e.g. To use a local alternative theme: carbon_theme = Carbon()
365
  carbon_theme = Carbon()
366
- with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
367
  import art
368
  text = art.__version__
369
 
370
  with gr.Row():
371
- with gr.Column(scale=1):
372
  gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100, show_share_button=False)
373
- with gr.Column(scale=20):
374
- gr.Markdown(f"<h1>Red-teaming HuggingFace with ART (v{text})</h1>", elem_classes="plot-padding")
375
 
376
 
377
- gr.Markdown('''This app guides you through a common workflow for assessing the robustness
378
- of HuggingFace models using standard datasets and state-of-the-art adversarial attacks
379
- found within the Adversarial Robustness Toolbox (ART).<br/><br/>Follow the instructions in each
380
- step below to carry out your own evaluation and determine the risks associated with using
381
- some of your favorite models! <b>#redteaming</b> <b>#trustworthyAI</b>''')
 
 
382
 
383
- # Model and Dataset Selection
384
- with gr.Accordion("1. Model selection", open=False):
385
-
386
- gr.Markdown("Select a Hugging Face model to launch an adversarial attack against.")
387
- model_type = gr.Radio(label="Hugging Face Model", choices=["Example", "Other"], value="Example")
388
- with gr.Column(visible=False) as other_model:
389
- gr.Markdown("Coming soon.")
390
- model_url = gr.Text(label="Model URL",
391
- placeholder="e.g. facebook/deit-tiny-distilled-patch16-224",
392
- value='facebook/deit-tiny-distilled-patch16-224', visible=False)
393
- model_input_channels = gr.Text(label="Input channels", value=3, visible=False)
394
- model_input_height = gr.Text(label="Input height", value=32, visible=False)
395
- model_input_width = gr.Text(label="Input width", value=32, visible=False)
396
- model_num_classes = gr.Text(label="Number of classes", value=10, visible=False)
397
- model_clip_values = gr.Radio(label="Clip values", choices=[1, 255], value=1, visible=False)
398
- model_upsample_scaling = gr.Slider(label="Upsample scale factor", minimum=1, maximum=10, value=7, visible=False)
399
-
400
- model_type.change(show_params, model_type, other_model)
401
-
402
- with gr.Accordion("2. Data selection", open=False):
403
- gr.Markdown("This section enables you to select a dataset for evaluation or upload your own image.")
404
- data_type = gr.Radio(label="Hugging Face dataset", choices=["Example", "URL", "Local"], value="Example")
405
- with gr.Column(visible=False) as other_dataset:
406
- gr.Markdown("Coming soon.")
407
- data_type.change(show_params, data_type, other_dataset)
408
 
409
- with gr.Accordion("3. Model inference", open=False):
410
-
411
- with gr.Row():
412
- with gr.Column(scale=1):
413
- preds_gallery = gr.Gallery(label="Predictions", preview=False, show_download_button=True)
414
- with gr.Column(scale=2):
415
- clean_accuracy = gr.Number(label="Clean accuracy",
416
- info="The accuracy achieved by the model in normal (non-adversarial) conditions.")
417
- bt_run_inference = gr.Button("Run inference")
418
- bt_clear = gr.ClearButton(components=[preds_gallery, clean_accuracy])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
- bt_run_inference.click(run_inference, inputs=[model_type, model_url, model_input_channels, model_input_height, model_input_width,
421
- model_num_classes, model_clip_values, model_upsample_scaling, data_type],
422
- outputs=[preds_gallery, clean_accuracy])
 
 
 
 
 
 
423
 
424
- # Attack Selection
425
- with gr.Accordion("4. Run attack", open=False):
 
 
 
426
 
427
- gr.Markdown("In this section you can select the type of adversarial attack you wish to deploy against your selected model.")
428
-
429
- with gr.Accordion("Evasion", open=False):
430
- gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
431
 
432
- with gr.Accordion("Projected Gradient Descent", open=False):
433
- gr.Markdown("This attack uses PGD to identify adversarial examples.")
434
-
435
- with gr.Row():
436
-
437
- with gr.Column(scale=1):
438
- attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
439
- max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
440
- eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=8/255)
441
- eps_steps = gr.Slider(minimum=0.0001, maximum=1, label="Epsilon steps", value=1/255)
442
- bt_eval_pgd = gr.Button("Evaluate")
443
-
444
- # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
445
- with gr.Column(scale=3):
446
- with gr.Row():
447
- with gr.Column():
448
- original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
449
- benign_output = gr.Label(num_top_classes=3, visible=False)
450
- clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
451
- quality_plot = gr.LinePlot(label="Gradient Quality", x='iteration', y='value', color='metric',
452
- x_title='Iteration', y_title='Avg in Gradients (%)',
453
- caption="""Illustrates the average percent of zero, infinity
454
- or NaN gradients identified in images
455
- across all batches.""", elem_classes="plot-padding", visible=False)
456
-
457
- with gr.Column():
458
- adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
459
- adversarial_output = gr.Label(num_top_classes=3, visible=False)
460
- robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
461
-
462
- with gr.Column():
463
- delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True)
464
-
465
- bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
466
- model_num_classes, model_clip_values, model_upsample_scaling,
467
- max_iter, eps, eps_steps, attack, attack, attack, attack, data_type],
468
- outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
469
- robust_accuracy])
470
-
471
- with gr.Accordion("Adversarial Patch", open=False):
472
- gr.Markdown("This attack crafts an adversarial patch that facilitates evasion.")
473
 
474
- with gr.Row():
475
-
476
- with gr.Column(scale=1):
477
- attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
478
- max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
479
- x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1)
480
- y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1)
481
- patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12)
482
- patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12)
483
- eval_btn_patch = gr.Button("Evaluate")
 
 
 
 
 
 
 
 
 
 
484
 
485
- # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
486
- with gr.Column(scale=3):
487
- with gr.Row():
488
- with gr.Column():
489
- original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
490
- clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
491
-
492
- with gr.Column():
493
- adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
494
- robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
495
-
496
- with gr.Column():
497
- delta_gallery = gr.Gallery(label="Patches", preview=False, show_download_button=True)
498
-
499
- eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
500
- model_num_classes, model_clip_values, model_upsample_scaling,
501
- max_iter, eps, eps_steps, x_location, y_location, patch_height, patch_width, data_type],
502
- outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
503
- robust_accuracy])
504
-
505
- with gr.Accordion("Poisoning", open=False):
506
-
507
- with gr.Accordion("Backdoor"):
508
 
509
- with gr.Row():
510
- with gr.Column(scale=1):
511
- attack = gr.Textbox(visible=True, value="Backdoor", label="Attack", interactive=False)
512
- target_class = gr.Radio(label="Target class", info="The class you wish to force the model to predict.",
513
- choices=['dog',
514
- 'cassette player',
515
- 'chainsaw',
516
- 'church',
517
- 'french horn',
518
- 'garbage truck',
519
- 'gas pump',
520
- 'golf ball',
521
- 'parachutte',], value='dog')
522
- trigger_image = gr.Image(label="Trigger Image", value="./baby-on-board.png")
523
- eval_btn_patch = gr.Button("Evaluate")
524
- with gr.Column(scale=2):
525
- clean_gallery = gr.Gallery(label="Clean", preview=False, show_download_button=True)
526
  clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
527
- with gr.Column(scale=2):
528
- poison_gallery = gr.Gallery(label="Poisoned", preview=False, show_download_button=True)
529
- poison_success = gr.Number(label="Poison Success", precision=2)
530
 
531
- eval_btn_patch.click(clf_poison_evaluate, inputs=[attack, model_type, trigger_image, target_class, data_type],
532
- outputs=[clean_gallery, poison_gallery, clean_accuracy, poison_success])
533
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  if __name__ == "__main__":
535
 
536
  # For development
 
1
  '''
2
+ ART Gradio Example App [Evasion]
3
 
4
  To run:
5
  - clone the repository
 
25
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
 
27
  css = """
28
+ :root {
29
+ --text-md: 20px !important;
30
+ --text-sm: 18px !important;
31
+ }
32
  .input-image { margin: auto !important }
33
  .plot-padding { padding: 20px; }
34
+ .eta-bar.svelte-1occ011.svelte-1occ011 {
35
+ background: #ccccff !important;
36
+ }
37
+ .center-text { text-align: center !important }
38
+ .larger-gap { gap: 100px !important; }
39
+ .symbols { text-align: center !important; margin: auto !important; }
40
+
41
+ div.svelte-15lo0d8>*, div.svelte-15lo0d8>.form > * {
42
+ min-width: 0px !important;
43
+ }
44
  """
45
 
46
+ def sample_CIFAR10():
47
+ label_names = [
48
+ 'airplane',
49
+ 'automobile',
50
+ 'bird',
51
+ 'cat',
52
+ 'deer',
53
+ 'dog',
54
+ 'frog',
55
+ 'horse',
56
+ 'ship',
57
+ 'truck',
58
+ ]
59
+ (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
60
+ x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
61
+ y_train = np.argmax(y_train, axis=1)
62
+ gallery_out = []
63
+ for i, im in enumerate(x_train[:10]):
64
+ gallery_out.append((im.transpose(1,2,0), label_names[y_train[i]]))
65
+ return gallery_out
66
+
67
  def clf_evasion_evaluate(*args):
68
  '''
69
  Run a classification task evaluation
70
  '''
71
  attack = args[0]
72
+ attack_max_iter = args[1]
73
+ attack_eps = args[2]
74
+ attack_eps_steps = args[3]
75
+ x_location = args[4]
76
+ y_location = args[5]
77
+ patch_height = args[6]
78
+ patch_width = args[7]
 
 
 
 
 
 
 
 
 
79
 
80
+ model = transformers.AutoModelForImageClassification.from_pretrained(
81
+ 'facebook/deit-tiny-distilled-patch16-224',
82
+ ignore_mismatched_sizes=True,
83
+ num_labels=10
84
+ )
85
+ upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
86
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
87
+ loss_fn = torch.nn.CrossEntropyLoss()
88
+
89
+ hf_model = HuggingFaceClassifierPyTorch(
90
+ model=model,
91
+ loss=loss_fn,
92
+ optimizer=optimizer,
93
+ input_shape=(3, 32, 32),
94
+ nb_classes=10,
95
+ clip_values=(0, 1),
96
+ processor=upsampler
97
+ )
98
+ model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
99
+ hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
100
+
101
+ (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
102
+ x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
103
+ y_train = np.argmax(y_train, axis=1)
 
 
104
 
105
+ classes = np.unique(y_train)
106
+ samples_per_class = 1
107
 
108
+ x_subset = []
109
+ y_subset = []
110
 
111
+ for c in classes:
112
+ indices = y_train == c
113
+ x_subset.append(x_train[indices][:samples_per_class])
114
+ y_subset.append(y_train[indices][:samples_per_class])
115
 
116
+ x_subset = np.concatenate(x_subset)
117
+ y_subset = np.concatenate(y_subset)
118
+
119
+ label_names = [
120
+ 'airplane',
121
+ 'automobile',
122
+ 'bird',
123
+ 'cat',
124
+ 'deer',
125
+ 'dog',
126
+ 'frog',
127
+ 'horse',
128
+ 'ship',
129
+ 'truck',
130
+ ]
131
 
132
  outputs = hf_model.predict(x_subset)
133
  clean_preds = np.argmax(outputs, axis=1)
 
148
  for i, im in enumerate(x_adv):
149
  adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
150
 
151
+ delta = ((x_subset - x_adv) + attack_eps) # * 5 # shift to 0 and make perturbations 10x larger to visualise them
152
+ delta[delta<0] = 0
153
+ '''if delta.max()>1:
154
+ delta = (delta-np.min(delta))/(np.max(delta)-np.min(delta))'''
155
+ delta[delta>1] = 1
156
  delta_gallery_out = delta.transpose(0, 2, 3, 1)
157
 
158
  if attack == "Adversarial Patch":
 
178
  adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
179
 
180
  delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc
183
 
184
  def show_params(type):
185
  '''
 
187
  '''
188
  if type!="Example":
189
  return gr.Column(visible=True)
190
+ return gr.Column(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # e.g. To use a local alternative theme: carbon_theme = Carbon()
193
  carbon_theme = Carbon()
194
+ with gr.Blocks(css=css, theme='Tshackelton/IBMPlex-DenseReadable') as demo:
195
  import art
196
  text = art.__version__
197
 
198
  with gr.Row():
199
+ with gr.Column(scale=1,):
200
  gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100, show_share_button=False)
201
+ with gr.Column(scale=2):
202
+ gr.Markdown(f"<h1>⚔️ Red-teaming HuggingFace with ART [Evasion]</h1>", elem_classes="plot-padding")
203
 
204
 
205
+ gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Red-teaming in AI is an activity where we masquerade
206
+ as evil attackers 😈 and attempt to find vulnerabilities in our AI models. Identifying scenarios where
207
+ our AI models do not work as expected, or fail, is important as it helps us better understand
208
+ its limitations and vulnerability when deployed in the real world 🧐</p>''')
209
+ gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ By attacking our AI models ourselves, we can better the risks associated with use
210
+ in the real world and implement mechanisms which can mitigate and protect our model. The example below demonstrates a
211
+ common red-team workflow to assess model vulnerability to evasion attacks ⚔️</p>''')
212
 
213
+ gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Check out the full suite of features provided by ART <a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox"
214
+ target="blank_">here</a>.</i></p>''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ gr.Markdown('''<hr/>''')
217
+
218
+
219
+ with gr.Row(elem_classes='larger-gap'):
220
+ with gr.Column(scale=1):
221
+ gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ First lets set the scene. You have a dataset of images, such as CIFAR-10.</p>''')
222
+ gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Note: CIFAR-10 images are low resolution images which span 10 different categories as shown.</i></p>''')
223
+ gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Your goal is to have an AI model capable of classifying these images. So you
224
+ train a model on this dataset, or use a pre-trained model from Hugging Face,
225
+ such as Meta's Distilled Data-efficient Image Transformer.</p>''')
226
+ with gr.Column(scale=1):
227
+ gr.Markdown('''
228
+ <p style="font-size: 20px;"><b>Hugging Face dataset:</b>
229
+ <a href="https://huggingface.co/datasets/cifar10" target="_blank">CIFAR-10</a></p>
230
+ <p style="font-size: 18px; padding-left: 20px;"><i>CIFAR-10 labels:</i>
231
+ <i>{airplane, automobile, bird, cat, deer, dog,
232
+ frog, horse, ship, truck}</i>
233
+ </p>
234
+ <p style="font-size: 20px;"><b>Hugging Face model:</b><br/>
235
+ <a href="https://huggingface.co/facebook/deit-tiny-patch16-224"
236
+ target="_blank">facebook/deit-tiny-distilled-patch16-224</a></p>
237
+ <br/>
238
+ <p style="font-size: 20px;">👀 take a look at the sample images from the CIFAR-10 dataset and their respective labels.</p>
239
+ ''')
240
+ with gr.Column(scale=1):
241
+ gr.Gallery(label="CIFAR-10", preview=True, value=sample_CIFAR10())
242
 
243
+ gr.Markdown('''<hr/>''')
244
+
245
+ gr.Markdown('''<p style="text-align: justify">ℹ️ Now as a responsible AI expert, you wish to assert that your model is not vulnerable to
246
+ attacks which might manipulate the prediction. For instance, ships become classified as birds. To do this, you will run deploy
247
+ adversarial attacks against your own model and assess its performance.</p>''')
248
+
249
+ gr.Markdown('''<p style="text-align: justify">ℹ️ Below are two common types of evasion attack. Both create adversarial images, which at first glance, seem the same as the original images,
250
+ however they contain subtle changes which cause the AI model to make incorrect predictions.</p><br/>''')
251
+
252
 
253
+ with gr.Accordion("Projected Gradient Descent", open=False):
254
+ gr.Markdown('''This attack uses the PGD optimization algorithm to identify the optimal perturbations
255
+ to add to an image (i.e. changing pixel values) to cause the model to misclassify images. See more
256
+ <a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox"
257
+ target="blank_">here</a>.''')
258
 
259
+ with gr.Row():
 
 
 
260
 
261
+ with gr.Column(scale=1):
262
+ attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
263
+ max_iter = gr.Slider(minimum=1, maximum=10, label="Max iterations", value=4)
264
+ eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.03)
265
+ eps_steps = gr.Slider(minimum=0.0001, maximum=1, label="Epsilon steps", value=0.003)
266
+ bt_eval_pgd = gr.Button("Evaluate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
+ # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
269
+ with gr.Column(scale=5):
270
+ with gr.Row(elem_classes='symbols'):
271
+ with gr.Column(scale=10):
272
+ gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br>''')
273
+ original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
274
+ benign_output = gr.Label(num_top_classes=3, visible=False)
275
+ clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
276
+ with gr.Column(scale=1, min_width='0px', elem_classes='symbols'):
277
+ gr.Markdown('''➕''')
278
+ with gr.Column(scale=10):
279
+ gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the calculated perturbations for attacking the model (black pixels indicate little to no perturbation).</i></p>''')
280
+ delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True)
281
+ with gr.Column(scale=1, min_width='0px'):
282
+ gr.Markdown('''🟰''', elem_classes='symbols')
283
+ with gr.Column(scale=10):
284
+ gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''')
285
+ adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
286
+ adversarial_output = gr.Label(num_top_classes=3, visible=False)
287
+ robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
288
 
289
+ bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, attack, attack, attack, attack],
290
+ outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
291
+ robust_accuracy])
292
+
293
+ gr.Markdown('''<br/>''')
294
+
295
+ with gr.Accordion("Adversarial Patch", open=False):
296
+ gr.Markdown('''This attack optimizes pixels in a patch which can be overlayed on an image, causing a model to misclassify. See more
297
+ <a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox"
298
+ target="blank_">here</a>.''')
299
+
300
+ with gr.Row():
301
+
302
+ with gr.Column(scale=1):
303
+ attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
304
+ max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
305
+ x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1)
306
+ y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1)
307
+ patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12)
308
+ patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12)
309
+ eval_btn_patch = gr.Button("Evaluate")
 
 
310
 
311
+ # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
312
+ with gr.Column(scale=3):
313
+ with gr.Row(elem_classes='symbols'):
314
+ with gr.Column(scale=10):
315
+ gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br>''')
316
+ original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
 
 
 
 
 
 
 
 
 
 
 
317
  clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
 
 
 
318
 
319
+ with gr.Column(scale=1, min_width='0px', elem_classes='symbols'):
320
+ gr.Markdown('''➕''')
321
+
322
+ with gr.Column(scale=10):
323
+ gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the optimized patch for attacking the model.</i></p><br>''')
324
+ delta_gallery = gr.Gallery(label="Patches", preview=True, show_download_button=True)
325
+
326
+ with gr.Column(scale=1, min_width='0px'):
327
+ gr.Markdown('''🟰''', elem_classes='symbols')
328
+
329
+ with gr.Column(scale=10):
330
+ gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''')
331
+ adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
332
+ robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
333
+
334
+ eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, x_location, y_location, patch_height,
335
+ patch_width],
336
+ outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
337
+ robust_accuracy])
338
+
339
+ gr.Markdown('''<br/>''')
340
+
341
  if __name__ == "__main__":
342
 
343
  # For development
requirements.txt CHANGED
@@ -7,4 +7,4 @@ tensorflow==2.10.1; sys_platform != "darwin"
7
  tensorflow-macos; sys_platform == "darwin"
8
  tensorflow-metal; sys_platform == "darwin"
9
  adversarial-robustness-toolbox
10
- gradio==4.2
 
7
  tensorflow-macos; sys_platform == "darwin"
8
  tensorflow-metal; sys_platform == "darwin"
9
  adversarial-robustness-toolbox
10
+ gradio==4.14