Spaces:
Sleeping
Sleeping
''' | |
ART Gradio Example App [Evasion] | |
To run: | |
- clone the repository | |
- execute: gradio examples/gradio_app.py or python examples/gradio_app.py | |
- navigate to local URL e.g. http://127.0.0.1:7860 | |
''' | |
import gradio as gr | |
import numpy as np | |
from carbon_theme import Carbon | |
import numpy as np | |
import torch | |
import transformers | |
from art.estimators.classification.hugging_face import HuggingFaceClassifierPyTorch | |
from art.attacks.evasion import ProjectedGradientDescentPyTorch, AdversarialPatchPyTorch | |
from art.utils import load_dataset | |
from art.attacks.poisoning import PoisoningAttackBackdoor | |
from art.attacks.poisoning.perturbations import insert_image | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
css = """ | |
.center-text { text-align: center !important } | |
.larger-gap { gap: 100px !important; } | |
.symbols { text-align: center !important; margin: auto !important; } | |
div.svelte-15lo0d8>*, div.svelte-15lo0d8>.form > * { | |
min-width: 0px !important; | |
} | |
""" | |
def sample_CIFAR10(): | |
label_names = [ | |
'airplane', | |
'automobile', | |
'bird', | |
'cat', | |
'deer', | |
'dog', | |
'frog', | |
'horse', | |
'ship', | |
'truck', | |
] | |
(x_train, y_train), (_, _), _, _ = load_dataset('cifar10') | |
x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32) | |
y_train = np.argmax(y_train, axis=1) | |
gallery_out = [] | |
for i, im in enumerate(x_train[:10]): | |
gallery_out.append((im.transpose(1,2,0), label_names[y_train[i]])) | |
return gallery_out | |
def clf_evasion_evaluate(*args): | |
''' | |
Run a classification task evaluation | |
''' | |
attack = args[0] | |
attack_max_iter = args[1] | |
attack_eps = args[2] | |
attack_eps_steps = args[3] | |
x_location = args[4] | |
y_location = args[5] | |
patch_height = args[6] | |
patch_width = args[7] | |
model = transformers.AutoModelForImageClassification.from_pretrained( | |
'facebook/deit-tiny-distilled-patch16-224', | |
ignore_mismatched_sizes=True, | |
num_labels=10 | |
) | |
upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest') | |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) | |
loss_fn = torch.nn.CrossEntropyLoss() | |
hf_model = HuggingFaceClassifierPyTorch( | |
model=model, | |
loss=loss_fn, | |
optimizer=optimizer, | |
input_shape=(3, 32, 32), | |
nb_classes=10, | |
clip_values=(0, 1), | |
processor=upsampler | |
) | |
model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt' | |
hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device)) | |
(x_train, y_train), (_, _), _, _ = load_dataset('cifar10') | |
x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32) | |
y_train = np.argmax(y_train, axis=1) | |
classes = np.unique(y_train) | |
samples_per_class = 1 | |
x_subset = [] | |
y_subset = [] | |
for c in classes: | |
indices = y_train == c | |
x_subset.append(x_train[indices][:samples_per_class]) | |
y_subset.append(y_train[indices][:samples_per_class]) | |
x_subset = np.concatenate(x_subset) | |
y_subset = np.concatenate(y_subset) | |
label_names = [ | |
'airplane', | |
'automobile', | |
'bird', | |
'cat', | |
'deer', | |
'dog', | |
'frog', | |
'horse', | |
'ship', | |
'truck', | |
] | |
outputs = hf_model.predict(x_subset) | |
clean_preds = np.argmax(outputs, axis=1) | |
clean_acc = np.mean(clean_preds == y_subset) | |
benign_gallery_out = [] | |
for i, im in enumerate(x_subset): | |
benign_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) | |
if attack == "PGD": | |
attacker = ProjectedGradientDescentPyTorch(hf_model, max_iter=attack_max_iter, | |
eps=attack_eps, eps_step=attack_eps_steps) | |
x_adv = attacker.generate(x_subset) | |
outputs = hf_model.predict(x_adv) | |
adv_preds = np.argmax(outputs, axis=1) | |
adv_acc = np.mean(adv_preds == y_subset) | |
adv_gallery_out = [] | |
for i, im in enumerate(x_adv): | |
adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) | |
delta = ((x_subset - x_adv) + attack_eps) * 10 # shift to 0 and make perturbations 10x larger to visualise them | |
if delta.max()>1: | |
delta = (delta-np.min(delta))/(np.max(delta)-np.min(delta)) | |
delta[delta>1] = 1 | |
delta[delta<0] = 0 | |
delta_gallery_out = delta.transpose(0, 2, 3, 1) | |
if attack == "Adversarial Patch": | |
scale_min = 0.3 | |
scale_max = 1.0 | |
rotation_max = 0 | |
learning_rate = 5000. | |
attacker = AdversarialPatchPyTorch(hf_model, scale_max=scale_max, | |
scale_min=scale_min, | |
rotation_max=rotation_max, | |
learning_rate=learning_rate, | |
max_iter=attack_max_iter, patch_type='square', | |
patch_location=(x_location, y_location), | |
patch_shape=(3, patch_height, patch_width)) | |
patch, _ = attacker.generate(x_subset) | |
x_adv = attacker.apply_patch(x_subset, scale=0.3) | |
outputs = hf_model.predict(x_adv) | |
adv_preds = np.argmax(outputs, axis=1) | |
adv_acc = np.mean(adv_preds == y_subset) | |
adv_gallery_out = [] | |
for i, im in enumerate(x_adv): | |
adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) | |
delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1) | |
return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc | |
def show_params(type): | |
''' | |
Show model parameters based on selected model type | |
''' | |
if type!="Example": | |
return gr.Column(visible=True) | |
return gr.Column(visible=False) | |
# e.g. To use a local alternative theme: carbon_theme = Carbon() | |
carbon_theme = Carbon() | |
with gr.Blocks(css=css, theme='Tshackelton/IBMPlex-DenseReadable') as demo: | |
import art | |
text = art.__version__ | |
with gr.Row(elem_classes="custom-text"): | |
with gr.Column(scale=1,): | |
gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100, show_share_button=False) | |
with gr.Column(scale=2): | |
gr.Markdown(f"<h1>⚔️ Red-teaming HuggingFace with ART [Evasion]</h1>", elem_classes="plot-padding") | |
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Red-teaming in AI is an activity where we masquerade | |
as evil attackers 😈 and attempt to find vulnerabilities in our AI models. Identifying scenarios where | |
our AI models do not work as expected, or fail, is important as it helps us better understand | |
its limitations and vulnerability when deployed in the real world 🧐</p>''') | |
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ By attacking our AI models ourselves, we can better the risks associated with use | |
in the real world and implement mechanisms which can mitigate and protect our model. The example below demonstrates a | |
common red-team workflow to assess model vulnerability to evasion attacks ⚔️</p>''') | |
gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Check out the full suite of features provided by ART <a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" | |
target="blank_">here</a>. To dive further into evasion attacks with Hugging Face and ART, check out our | |
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/hugging_face_evasion.ipynb" | |
target="_blank">notebook</a>. Also feel free to contribute and give our repo a ⭐.</i></p>''') | |
gr.Markdown('''<hr/>''') | |
with gr.Row(elem_classes=["larger-gap", "custom-text"]): | |
with gr.Column(scale=1): | |
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ First lets set the scene. You have a dataset of images, such as CIFAR-10.</p>''') | |
gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Note: CIFAR-10 images are low resolution images which span 10 different categories as shown.</i></p>''') | |
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Your goal is to have an AI model capable of classifying these images. So you | |
train a model on this dataset, or use a pre-trained model from Hugging Face, | |
such as Meta's Distilled Data-efficient Image Transformer.</p>''') | |
with gr.Column(scale=1): | |
gr.Markdown(''' | |
<p style="font-size: 20px;"><b>Hugging Face dataset:</b> | |
<a href="https://huggingface.co/datasets/cifar10" target="_blank">CIFAR-10</a></p> | |
<p style="font-size: 18px; padding-left: 20px;"><i>CIFAR-10 labels:</i> | |
<i>{airplane, automobile, bird, cat, deer, dog, | |
frog, horse, ship, truck}</i> | |
</p> | |
<p style="font-size: 20px;"><b>Hugging Face model:</b><br/> | |
<a href="https://huggingface.co/facebook/deit-tiny-patch16-224" | |
target="_blank">facebook/deit-tiny-distilled-patch16-224</a></p> | |
<br/> | |
<p style="font-size: 20px;">👀 take a look at the sample images from the CIFAR-10 dataset and their respective labels.</p> | |
''') | |
with gr.Column(scale=1): | |
gr.Gallery(label="CIFAR-10", preview=True, value=sample_CIFAR10(), height=420) | |
gr.Markdown('''<hr/>''') | |
gr.Markdown('''<p style="text-align: justify; font-size: 18px">ℹ️ Now as a responsible AI expert, you wish to assert that your model is not vulnerable to | |
attacks which might manipulate the prediction. For instance, ships become classified as birds. To do this, you will deploy | |
adversarial attacks against your own model and assess its performance.</p>''') | |
gr.Markdown('''<p style="text-align: justify; font-size: 18px">ℹ️ Below are two common types of evasion attack. Both create adversarial images, which at first glance, seem the same as the original images, | |
however they contain subtle changes which cause the AI model to make incorrect predictions.</p><br/>''') | |
with gr.Accordion("Projected Gradient Descent", open=False, elem_classes="custom-text"): | |
gr.Markdown('''This attack uses the PGD optimization algorithm to identify the optimal perturbations | |
to add to an image (i.e. changing pixel values) to cause the model to misclassify images. See more | |
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" | |
target="blank_">here</a>.''') | |
with gr.Row(): | |
with gr.Column(scale=1): | |
attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False) | |
max_iter = gr.Slider(minimum=1, maximum=10, label="Max iterations", value=4) | |
eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.3) | |
eps_steps = gr.Slider(minimum=0.0001, maximum=1, label="Epsilon steps", value=0.03) | |
bt_eval_pgd = gr.Button("Evaluate") | |
# Evaluation Output. Visualisations of success/failures of running evaluation attacks. | |
with gr.Column(scale=5): | |
with gr.Row(elem_classes='symbols'): | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br>''') | |
original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True) | |
benign_output = gr.Label(num_top_classes=3, visible=False) | |
clean_accuracy = gr.Number(label="Clean Accuracy", precision=2) | |
with gr.Column(scale=1, min_width='0px', elem_classes='symbols'): | |
gr.Markdown('''➕''') | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the calculated perturbations for attacking the model (black pixels indicate little to no perturbation).</i></p>''') | |
delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True) | |
with gr.Column(scale=1, min_width='0px'): | |
gr.Markdown('''🟰''', elem_classes='symbols') | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''') | |
adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True) | |
adversarial_output = gr.Label(num_top_classes=3, visible=False) | |
robust_accuracy = gr.Number(label="Robust Accuracy", precision=2) | |
bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, attack, attack, attack, attack], | |
outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy, | |
robust_accuracy]) | |
gr.Markdown('''<br/>''') | |
with gr.Accordion("Adversarial Patch", open=False, elem_classes="custom-text"): | |
gr.Markdown('''This attack optimizes pixels in a patch which can be overlayed on an image, causing a model to misclassify. See more | |
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" | |
target="blank_">here</a>.''') | |
with gr.Row(): | |
with gr.Column(scale=1): | |
attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False) | |
max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10) | |
x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1) | |
y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1) | |
patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12) | |
patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12) | |
eval_btn_patch = gr.Button("Evaluate") | |
# Evaluation Output. Visualisations of success/failures of running evaluation attacks. | |
with gr.Column(scale=3): | |
with gr.Row(elem_classes='symbols'): | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br><br>''') | |
original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True) | |
clean_accuracy = gr.Number(label="Clean Accuracy", precision=2) | |
with gr.Column(scale=1, min_width='0px', elem_classes='symbols'): | |
gr.Markdown('''➕''') | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the optimized patch for attacking the model.</i></p><br><br>''') | |
delta_gallery = gr.Gallery(label="Patches", preview=True, show_download_button=True) | |
with gr.Column(scale=1, min_width='0px'): | |
gr.Markdown('''🟰''', elem_classes='symbols') | |
with gr.Column(scale=10): | |
gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''') | |
adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True) | |
robust_accuracy = gr.Number(label="Robust Accuracy", precision=2) | |
eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, x_location, y_location, patch_height, | |
patch_width], | |
outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy, | |
robust_accuracy]) | |
gr.Markdown('''<br/>''') | |
if __name__ == "__main__": | |
# For development | |
'''demo.launch(show_api=False, debug=True, share=False, | |
server_name="0.0.0.0", | |
server_port=7777, | |
ssl_verify=False, | |
max_threads=20)''' | |
# For deployment | |
demo.launch() |