adaptive
Browse files- index.html +43 -2
- static/css/custom.css +44 -1
index.html
CHANGED
@@ -473,7 +473,7 @@
|
|
473 |
|
474 |
|
475 |
<div class="columns is-centered">
|
476 |
-
<div class="column">
|
477 |
<div class="content">
|
478 |
<h2 class="title is-4">Performance against Adaptive Attacks</h2>
|
479 |
</div>
|
@@ -482,7 +482,48 @@
|
|
482 |
<h2 class="title is-4">Contribution of Representation Similarity & Label Con-
|
483 |
sistency against Adaptive Attacks</h2>
|
484 |
</div>
|
485 |
-
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
</div>
|
487 |
</div>
|
488 |
|
|
|
473 |
|
474 |
|
475 |
<div class="columns is-centered">
|
476 |
+
<!-- <div class="column">
|
477 |
<div class="content">
|
478 |
<h2 class="title is-4">Performance against Adaptive Attacks</h2>
|
479 |
</div>
|
|
|
482 |
<h2 class="title is-4">Contribution of Representation Similarity & Label Con-
|
483 |
sistency against Adaptive Attacks</h2>
|
484 |
</div>
|
485 |
+
</div> -->
|
486 |
+
<div id="adaptive-demo" class="container">
|
487 |
+
<div class="row align-items-center">
|
488 |
+
<div class="row" style="margin: 10px 0 0">
|
489 |
+
<div class="models-list">
|
490 |
+
<span style="margin-right: 1em;">Models</span>
|
491 |
+
<span class="radio-group"><input type="radio" id="LLaMA2" class="options" name="models" value="llama2_7b_chat" checked="" /><label for="LLaMA2" class="option-label">LLaMA-2-7B-Chat</label></span>
|
492 |
+
<span class="radio-group"><input type="radio" id="Vicuna" class="options" name="models" value="vicuna_7b_v1.5" /><label for="Vicuna" class="option-label">Vicuna-7B-V1.5</label></span>
|
493 |
+
</div>
|
494 |
+
</div>
|
495 |
+
</div>
|
496 |
+
<div class="row align-items-center">
|
497 |
+
<div class="col-4">
|
498 |
+
<div id="defense-methods">
|
499 |
+
<div class="row align-items-center"><input type="radio" id="defense_ppl" class="options" name="defense" value="ppl" /><label for="defense_ppl" class="defense">CIFAR-10</label></div>
|
500 |
+
<div class="row align-items-center"><input type="radio" id="defense_smoothllm" class="options" name="defense" value="smoothllm" /><label for="defense_smoothllm" class="defense">CIFAR-100</label></div>
|
501 |
+
<div class="row align-items-center"><input type="radio" id="defense_erase_check" class="options" name="defense" value="erase_check" /><label for="defense_erase_check" class="defense">ImageNet</label></div>
|
502 |
+
</div>
|
503 |
+
<div class="row align-items-center">
|
504 |
+
<div class="attack-success-rate"><span class="jailbreak-metric">Average Malicious Refusal Rate</span><span class="attack-success-rate-value" id="asr-value">0.959</span></div>
|
505 |
+
</div>
|
506 |
+
<div class="row align-items-center">
|
507 |
+
<div class="benign-refusal-rate"><span class="jailbreak-metric">Benign Refusal Rate</span><span class="benign-refusal-rate-value" id="brr-value">0.050</span></div>
|
508 |
+
</div>
|
509 |
+
</div>
|
510 |
+
<div class="col-8">
|
511 |
+
<figure class="figure">
|
512 |
+
<img id="reliability-diagram" src="demo_results/gradient_cuff_llama2_7b_chat_threshold_100.png" alt="CIFAR-100 Calibrated Reliability Diagram (Full)" />
|
513 |
+
<div class="slider-container">
|
514 |
+
<div class="slider-label"><span>Perplexity Threshold</span></div>
|
515 |
+
<div class="slider-content" id="ppl-slider"><div id="ppl-threshold" class="ui-slider-handle"></div></div>
|
516 |
+
</div>
|
517 |
+
<div class="slider-container">
|
518 |
+
<div class="slider-label"><span>Gradient Threshold</span></div>
|
519 |
+
<div class="slider-content" id="gradient-norm-slider"><div id="gradient-norm-threshold" class="slider-value ui-slider-handle"></div></div>
|
520 |
+
</div>
|
521 |
+
<figcaption class="figure-caption">
|
522 |
+
</figcaption>
|
523 |
+
</figure>
|
524 |
+
</div>
|
525 |
+
</div>
|
526 |
+
</div>
|
527 |
</div>
|
528 |
</div>
|
529 |
|
static/css/custom.css
CHANGED
@@ -4,4 +4,47 @@
|
|
4 |
|
5 |
#adaptive-loss-formula .formula-list a { display: inline-block; width: 250px; margin: 0 20px; padding: 8px 10px; text-align: center; background: #DDD; cursor: pointer; text-decoration: none; color: #333; border-radius: 10px; user-select: none; transition-duration: 0.3s; }
|
6 |
|
7 |
-
#adaptive-loss-formula .formula-list a:hover, #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
#adaptive-loss-formula .formula-list a { display: inline-block; width: 250px; margin: 0 20px; padding: 8px 10px; text-align: center; background: #DDD; cursor: pointer; text-decoration: none; color: #333; border-radius: 10px; user-select: none; transition-duration: 0.3s; }
|
6 |
|
7 |
+
#adaptive-loss-formula .formula-list a:hover, #adaptive-demo #defense-methods .defense:hover { background: #555; color: #FFF; }
|
8 |
+
|
9 |
+
|
10 |
+
#adaptive-demo #defense-methods .options:checked ~ .defense { color: #FFF; background: #555; }
|
11 |
+
|
12 |
+
#adaptive-demo #defense-methods .defense { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; background: #DDD; cursor: pointer; text-decoration: none; color: #333; border-radius: 10px; user-select: none; transition-duration: 0.3s; }
|
13 |
+
|
14 |
+
#adaptive-demo .legend { text-align: center; width: 70%; margin: 0 auto; }
|
15 |
+
|
16 |
+
#adaptive-demo .figure-option { text-align: center; width: 70%; margin: 4% auto 0; /* Customize the label (the container) */ /* Hide the browser's default checkbox */ /* Create a custom checkbox */ /* On mouse-over, add a grey background color */ /* When the checkbox is checked, add a blue background */ /* Create the checkmark/indicator (hidden when not checked) */ /* Show the checkmark when checked */ /* Style the checkmark/indicator */ }
|
17 |
+
#adaptive-demo .figure-option .container { display: block; position: relative; padding-left: 35px; margin-bottom: 12px; cursor: pointer; font-size: 22px; -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; }
|
18 |
+
#adaptive-demo .figure-option .container input { position: absolute; opacity: 0; cursor: pointer; height: 0; width: 0; }
|
19 |
+
#adaptive-demo .figure-option .checkmark { position: absolute; top: 4px; left: 8px; height: 25px; width: 25px; background-color: #eee; }
|
20 |
+
#adaptive-demo .figure-option .container:hover input ~ .checkmark { background-color: #ccc; }
|
21 |
+
#adaptive-demo .figure-option .container input:checked ~ .checkmark { background-color: #9b9bff; }
|
22 |
+
#adaptive-demo .figure-option .checkmark:after { content: ""; position: absolute; display: none; }
|
23 |
+
#adaptive-demo .figure-option .container input:checked ~ .checkmark:after { display: block; }
|
24 |
+
#adaptive-demo .figure-option .container .checkmark:after { left: 9px; top: 5px; width: 5px; height: 10px; border: solid white; border-width: 0 3px 3px 0; -webkit-transform: rotate(45deg); -ms-transform: rotate(45deg); transform: rotate(45deg); }
|
25 |
+
|
26 |
+
#adaptive-demo .figure { margin: 0 auto; display: block; }
|
27 |
+
|
28 |
+
#adaptive-demo .figure #original { display: none; }
|
29 |
+
|
30 |
+
#adaptive-demo .figure img { user-drag: none; -webkit-user-drag: none; user-select: none; -khtml-user-drag: none; -moz-user-drag: none; -o-user-drag: none; pointer-events: none; position: relative; left: 35px; }
|
31 |
+
|
32 |
+
#adaptive-demo .figure-caption { width: 240px; text-align: center; display: block; margin: 0 auto; padding: 10px 0 0; font-size: .8em; }
|
33 |
+
|
34 |
+
#adaptive-demo .figure-caption ul { padding-left: 0; }
|
35 |
+
|
36 |
+
#adaptive-demo .figure-caption ul li { list-style: none; }
|
37 |
+
|
38 |
+
#adaptive-demo .figure-caption .model-prediction { font-weight: bold; }
|
39 |
+
|
40 |
+
#adaptive-demo .figure-caption .correct { color: #009926; }
|
41 |
+
|
42 |
+
#adaptive-demo .figure-caption .wrong { color: #e31327; }
|
43 |
+
|
44 |
+
#adaptive-demo .attack-success-rate { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; text-decoration: none; background: #DDD; color: #333; border-radius: 10px; user-select: none; }
|
45 |
+
#adaptive-demo .attack-success-rate .jailbreak-metric { font-size: 0.75em; display: block; }
|
46 |
+
#adaptive-demo .attack-success-rate .attack-success-rate-value { font-size: 1.5em; font-family: "sans-serif"; color: #820000; }
|
47 |
+
|
48 |
+
#adaptive-demo .benign-refusal-rate { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; text-decoration: none; background: #DDD; color: #333; border-radius: 10px; user-select: none; }
|
49 |
+
#adaptive-demo .benign-refusal-rate .jailbreak-metric { font-size: 0.75em; display: block; }
|
50 |
+
#adaptive-demo .benign-refusal-rate .benign-refusal-rate-value { font-size: 1.5em; font-family: "sans-serif"; color: #820000; }
|