Spaces:
Running
Running
emilylearning
commited on
Commit
·
25dd383
1
Parent(s):
7c94469
update markdown
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
|
2 |
# %%
|
3 |
import gradio as gr
|
4 |
import matplotlib.pyplot as plt
|
@@ -17,7 +16,6 @@ MODEL_NAME_DICT = {
|
|
17 |
"bert-large-uncased": "BERT-large",
|
18 |
"roberta-base": "RoBERTa-base",
|
19 |
"bert-base-uncased": "BERT-base",
|
20 |
-
"olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
|
21 |
OWN_MODEL_NAME: "Your model's"
|
22 |
}
|
23 |
MODEL_NAMES = list(MODEL_NAME_DICT.keys())
|
@@ -51,10 +49,10 @@ GENDERED_LIST = [
|
|
51 |
]
|
52 |
|
53 |
|
54 |
-
|
55 |
# %%
|
56 |
# Fire up the models
|
57 |
-
models = {m
|
|
|
58 |
|
59 |
# %%
|
60 |
# Get the winogender sentences
|
@@ -63,7 +61,9 @@ occs = sorted(list({sentence_id.split('_')[0]
|
|
63 |
for sentence_id in winogender_sentences}))
|
64 |
|
65 |
# %%
|
66 |
-
|
|
|
|
|
67 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
68 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
69 |
|
@@ -109,8 +109,9 @@ def get_figure(df, model_name, occ):
|
|
109 |
ax.bar(xs, ys)
|
110 |
ax.axis('tight')
|
111 |
ax.set_xlabel("Sentence number")
|
112 |
-
ax.set_ylabel("
|
113 |
-
ax.set_title(
|
|
|
114 |
return fig
|
115 |
|
116 |
|
@@ -121,7 +122,7 @@ def predict_gender_pronouns(
|
|
121 |
texts,
|
122 |
occ,
|
123 |
):
|
124 |
-
"""Run inference on input_text for selected model type, returning
|
125 |
"""
|
126 |
|
127 |
# TODO: make these selectable by user
|
@@ -130,8 +131,8 @@ def predict_gender_pronouns(
|
|
130 |
|
131 |
# For debugging
|
132 |
print('input_texts', texts)
|
133 |
-
|
134 |
-
if model_name is None or model_name == '':
|
135 |
model_name = MODEL_NAMES[0]
|
136 |
model = models[model_name]
|
137 |
elif model_name == OWN_MODEL_NAME:
|
@@ -143,7 +144,7 @@ def predict_gender_pronouns(
|
|
143 |
|
144 |
indie_vars_list = indie_vars.split(',')
|
145 |
|
146 |
-
male_gendered_tokens, female_gendered_tokens =
|
147 |
|
148 |
masked_texts = [text.replace('MASK', mask_token) for text in texts]
|
149 |
|
@@ -193,7 +194,7 @@ def predict_gender_pronouns(
|
|
193 |
/ num_ave), DECIMAL_PLACES)
|
194 |
|
195 |
uncertain_df = pd.DataFrame.from_dict(
|
196 |
-
all_uncertainty_f, orient='index', columns=['
|
197 |
|
198 |
uncertain_df = uncertain_df.reset_index().rename(
|
199 |
columns={'index': 'Sentence number'})
|
@@ -208,28 +209,26 @@ def predict_gender_pronouns(
|
|
208 |
demo = gr.Blocks()
|
209 |
with demo:
|
210 |
input_texts = gr.Variable([])
|
211 |
-
gr.Markdown("
|
212 |
-
|
213 |
-
|
214 |
-
gr.Markdown("
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
gr.Markdown("#### TL;DR")
|
227 |
-
gr.Markdown("Follow steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
|
228 |
|
229 |
with gr.Row():
|
230 |
model_name = gr.Radio(
|
231 |
MODEL_NAMES,
|
232 |
-
|
|
|
233 |
)
|
234 |
own_model_name = gr.Textbox(
|
235 |
label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
|
@@ -246,11 +245,11 @@ with demo:
|
|
246 |
lines=2,
|
247 |
label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
248 |
to include a single MASK-ed out pronoun. \
|
249 |
-
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
250 |
)
|
251 |
|
252 |
with gr.Row():
|
253 |
-
get_text_btn = gr.Button("3)
|
254 |
|
255 |
get_text_btn.click(
|
256 |
fn=display_input_texts,
|
@@ -261,16 +260,17 @@ with demo:
|
|
261 |
)
|
262 |
|
263 |
with gr.Row():
|
264 |
-
uncertain_btn = gr.Button("4)
|
265 |
gr.Markdown(
|
266 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
267 |
|
268 |
with gr.Row():
|
269 |
-
female_fig = gr.Plot()
|
270 |
with gr.Row():
|
271 |
female_df = gr.Dataframe()
|
272 |
with gr.Row():
|
273 |
-
display_text = gr.Textbox(
|
|
|
274 |
|
275 |
uncertain_btn.click(
|
276 |
fn=predict_gender_pronouns,
|
@@ -281,4 +281,4 @@ with demo:
|
|
281 |
|
282 |
demo.launch(debug=True)
|
283 |
|
284 |
-
# %%
|
|
|
|
|
1 |
# %%
|
2 |
import gradio as gr
|
3 |
import matplotlib.pyplot as plt
|
|
|
16 |
"bert-large-uncased": "BERT-large",
|
17 |
"roberta-base": "RoBERTa-base",
|
18 |
"bert-base-uncased": "BERT-base",
|
|
|
19 |
OWN_MODEL_NAME: "Your model's"
|
20 |
}
|
21 |
MODEL_NAMES = list(MODEL_NAME_DICT.keys())
|
|
|
49 |
]
|
50 |
|
51 |
|
|
|
52 |
# %%
|
53 |
# Fire up the models
|
54 |
+
models = {m: pipeline("fill-mask", model=m)
|
55 |
+
for m in MODEL_NAMES if m != OWN_MODEL_NAME}
|
56 |
|
57 |
# %%
|
58 |
# Get the winogender sentences
|
|
|
61 |
for sentence_id in winogender_sentences}))
|
62 |
|
63 |
# %%
|
64 |
+
|
65 |
+
|
66 |
+
def get_gendered_token_ids():
|
67 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
68 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
69 |
|
|
|
109 |
ax.bar(xs, ys)
|
110 |
ax.axis('tight')
|
111 |
ax.set_xlabel("Sentence number")
|
112 |
+
ax.set_ylabel("Specification Metric")
|
113 |
+
ax.set_title(
|
114 |
+
f"Task Specification Metric on {MODEL_NAME_DICT[model_name]} for '{occ}' sentences")
|
115 |
return fig
|
116 |
|
117 |
|
|
|
122 |
texts,
|
123 |
occ,
|
124 |
):
|
125 |
+
"""Run inference on input_text for selected model type, returning Task Specification metric results.
|
126 |
"""
|
127 |
|
128 |
# TODO: make these selectable by user
|
|
|
131 |
|
132 |
# For debugging
|
133 |
print('input_texts', texts)
|
134 |
+
|
135 |
+
if model_name is None or model_name == '':
|
136 |
model_name = MODEL_NAMES[0]
|
137 |
model = models[model_name]
|
138 |
elif model_name == OWN_MODEL_NAME:
|
|
|
144 |
|
145 |
indie_vars_list = indie_vars.split(',')
|
146 |
|
147 |
+
male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
|
148 |
|
149 |
masked_texts = [text.replace('MASK', mask_token) for text in texts]
|
150 |
|
|
|
194 |
/ num_ave), DECIMAL_PLACES)
|
195 |
|
196 |
uncertain_df = pd.DataFrame.from_dict(
|
197 |
+
all_uncertainty_f, orient='index', columns=['Specification Metric'])
|
198 |
|
199 |
uncertain_df = uncertain_df.reset_index().rename(
|
200 |
columns={'index': 'Sentence number'})
|
|
|
209 |
demo = gr.Blocks()
|
210 |
with demo:
|
211 |
input_texts = gr.Variable([])
|
212 |
+
gr.Markdown("**Detect Task Specification at Inference-time.**")
|
213 |
+
|
214 |
+
gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
|
215 |
+
gr.Markdown(f"""1) Pick a preloaded BERT-like model.
|
216 |
+
*Note: RoBERTa-large performance is best.*
|
217 |
+
2) Pick an Occupation type from the Winogender Schemas evaluation set.
|
218 |
+
*Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
|
219 |
+
3) Click button to load input texts.
|
220 |
+
*Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
|
221 |
+
4) Click button to get Task Specification Metric results!
|
222 |
+
""")
|
223 |
+
|
224 |
+
|
225 |
+
|
|
|
|
|
|
|
226 |
|
227 |
with gr.Row():
|
228 |
model_name = gr.Radio(
|
229 |
MODEL_NAMES,
|
230 |
+
type="value",
|
231 |
+
label="1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).",
|
232 |
)
|
233 |
own_model_name = gr.Textbox(
|
234 |
label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
|
|
|
245 |
lines=2,
|
246 |
label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
247 |
to include a single MASK-ed out pronoun. \
|
248 |
+
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
249 |
)
|
250 |
|
251 |
with gr.Row():
|
252 |
+
get_text_btn = gr.Button("3) Click to load input texts.)")
|
253 |
|
254 |
get_text_btn.click(
|
255 |
fn=display_input_texts,
|
|
|
260 |
)
|
261 |
|
262 |
with gr.Row():
|
263 |
+
uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
|
264 |
gr.Markdown(
|
265 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
266 |
|
267 |
with gr.Row():
|
268 |
+
female_fig = gr.Plot(type="auto")
|
269 |
with gr.Row():
|
270 |
female_df = gr.Dataframe()
|
271 |
with gr.Row():
|
272 |
+
display_text = gr.Textbox(
|
273 |
+
type="auto", label="Sample of text fed to model")
|
274 |
|
275 |
uncertain_btn.click(
|
276 |
fn=predict_gender_pronouns,
|
|
|
281 |
|
282 |
demo.launch(debug=True)
|
283 |
|
284 |
+
# %%
|