Spaces:
Runtime error
Runtime error
File size: 6,519 Bytes
d5b2eed bce177f d5b2eed bce177f 82b7df7 81a18f0 829775d bce177f 82b7df7 bce177f e3032e8 d5b2eed bce177f 0b9ecb2 e91bd7c d5b2eed e91bd7c d5b2eed e3032e8 d5b2eed e3032e8 5a5a81e 0b9ecb2 d5b2eed fb34e92 d5b2eed fb34e92 d5b2eed 0b9ecb2 e91bd7c e3e024d 29025ba 40bc8d5 5a5a81e 40bc8d5 e91bd7c d5b2eed e3032e8 d5b2eed bce177f 29025ba 829775d 1d91315 d5b2eed bce177f d5b2eed e91bd7c bce177f d5b2eed a54b97e bce177f 963adc8 df77487 963adc8 5c5d2d9 40bc8d5 963adc8 40bc8d5 829775d 963adc8 df77487 5a5a81e 1d91315 829775d 40bc8d5 bce177f e91bd7c d5b2eed bce177f 29025ba 1d91315 029862d d5b2eed bce177f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# Basic example for doing model-in-the-loop dynamic adversarial data collection
# using Gradio Blocks.
import os
import random
from urllib.parse import parse_qs
import gradio as gr
import requests
from transformers import pipeline
from huggingface_hub import Repository
from dotenv import load_dotenv
from pathlib import Path
import json
from filelock import FileLock
# These variables are for storing the mturk HITs in a Hugging Face dataset.
if Path(".env").is_file():
load_dotenv(".env")
DATASET_REPO_URL = os.getenv("DATASET_REPO_URL")
HF_TOKEN = os.getenv("HF_TOKEN")
DATA_FILENAME = "data.jsonl"
DATA_FILE = os.path.join("data", DATA_FILENAME)
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
# Now let's run the app!
pipe = pipeline("sentiment-analysis")
demo = gr.Blocks()
with demo:
total_cnt = 2 # How many examples per HIT
dummy = gr.Textbox(visible=False) # dummy for passing assignmentId
# We keep track of state as a JSON
state_dict = {"assignmentId": "", "cnt": 0, "cnt_fooled": 0, "data": []}
state = gr.JSON(state_dict, visible=False)
gr.Markdown("# DADC in Gradio example")
gr.Markdown("Try to fool the model and find an example where it predicts the wrong label!")
state_display = gr.Markdown(f"State: 0/{total_cnt} (0 fooled)")
# Generate model prediction
# Default model: distilbert-base-uncased-finetuned-sst-2-english
def _predict(txt, tgt, state, dummy):
pred = pipe(txt)[0]
other_label = 'negative' if pred['label'].lower() == "positive" else "positive"
pred_confidences = {pred['label'].lower(): pred['score'], other_label: 1 - pred['score']}
pred["label"] = pred["label"].title()
ret = f"Target: **{tgt}**. Model prediction: **{pred['label']}**\n\n"
fooled = pred["label"] != tgt
if fooled:
state["cnt_fooled"] += 1
ret += " You fooled the model! Well done!"
else:
ret += " You did not fool the model! Too bad, try again!"
state["cnt"] += 1
done = state["cnt"] == total_cnt
toggle_final_submit = gr.update(visible=done)
toggle_example_submit = gr.update(visible=not done)
new_state_md = f"State: {state['cnt']}/{total_cnt} ({state['cnt_fooled']} fooled)"
state["data"].append({"cnt": state["cnt"], "text": txt, "target": tgt, "model_pred": pred["label"], "fooled": fooled})
query = parse_qs(dummy[1:])
if "assignmentId" in query:
# It seems that someone is using this app on mturk. We need to
# store the assignmentId in the state before submit_hit_button
# is clicked. We can do this here in _predict. We need to save the
# assignmentId so that the turker can get credit for their HIT.
state["assignmentId"] = query["assignmentId"][0]
return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, new_state_md, dummy
# Input fields
text_input = gr.Textbox(placeholder="Enter model-fooling statement", show_label=False)
labels = ["Positive", "Negative"]
random.shuffle(labels)
label_input = gr.Radio(choices=labels, label="Target (correct) label")
label_output = gr.Label()
text_output = gr.Markdown()
with gr.Column() as example_submit:
submit_ex_button = gr.Button("Submit")
with gr.Column(visible=False) as final_submit:
submit_hit_button = gr.Button("Submit HIT")
# Store the HIT data into a Hugging Face dataset.
# The HIT is also stored and logged on mturk when post_hit_js is run below.
# This _store_in_huggingface_dataset function just demonstrates how easy it is
# to automatically create a Hugging Face dataset from mturk.
def _store_in_huggingface_dataset(state):
lock = FileLock(DATA_FILE + ".lock")
lock.acquire()
try:
with open(DATA_FILE, "a") as jsonlfile:
json_data_with_assignment_id =\
[json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
repo.push_to_hub()
finally:
lock.release()
return state
# Button event handlers
get_window_location_search_js = """
function(text_input, label_input, state, dummy) {
return [text_input, label_input, state, window.location.search];
}
"""
submit_ex_button.click(
_predict,
inputs=[text_input, label_input, state, dummy],
outputs=[label_output, text_output, state, example_submit, final_submit, state_display, dummy],
_js=get_window_location_search_js,
)
post_hit_js = """
function(state) {
if (state["assignmentId"] !== "" && state["assignmentId"] !== "ASSIGNMENT_ID_NOT_AVAILABLE"){
// If there is an assignmentId, then the submitter is on mturk
// and has accepted the HIT. So, we need to submit their HIT.
const form = document.createElement('form');
form.action = 'https://workersandbox.mturk.com/mturk/externalSubmit';
form.method = 'post';
for (const key in state) {
const hiddenField = document.createElement('input');
hiddenField.type = 'hidden';
hiddenField.name = key;
hiddenField.value = state[key];
form.appendChild(hiddenField);
};
document.body.appendChild(form);
form.submit();
return state;
} else {
// If there is no assignmentId, then we assume that the submitter is
// on huggingface.co or there is an mturker doing the preview.
// This means that we can't log a HIT in mturk, but
// _store_in_huggingface_dataset will still store the example in
// our dataset without an assignmentId. The following line here
// loads the app again so the user can enter in another "fake" HIT.
window.location.href = window.location.href;
return state;
}
}
"""
submit_hit_button.click(
_store_in_huggingface_dataset,
inputs=[state],
outputs=[state],
_js=post_hit_js,
)
demo.launch() |