Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import string | |
import re | |
import json | |
import random | |
import torch | |
import hashlib, base64 | |
from tqdm import tqdm | |
from gradio.themes.base import Base | |
import openai | |
# error messages | |
from error_messages import * | |
tqdm().pandas() | |
# bias testing manager | |
import mgr_bias_scoring as bt_mgr | |
# managers for sentences and biases | |
import mgr_requests as rq_mgr | |
import mgr_biases as bmgr | |
use_paper_sentences = False | |
G_NUM_SENTENCES = 0 | |
def getTermsFromGUI(group1, group2, att1, att2): | |
bias_spec = { | |
"social_groups": { | |
"group 1": [t.strip(" ") for t in group1.split(",") if len(t.strip(' '))>0], | |
"group 2": [t.strip(" ") for t in group2.split(",") if len(t.strip(' '))>0]}, | |
"attributes": { | |
"attribute 1": [t.strip(" ") for t in att1.split(",") if len(t.strip(' '))>0], | |
"attribute 2": [t.strip(" ") for t in att2.split(",") if len(t.strip(' '))>0]} | |
} | |
return bias_spec | |
# Select from example datasets | |
def prefillBiasSpec(evt: gr.SelectData): | |
global use_paper_sentences | |
print(f"Selected {evt.value} at {evt.index} from {evt.target}") | |
#bias_filename = f"{evt.value[1]}.json" | |
bias_filename = f"{bmgr.bias2tag[evt.value]}.json" | |
print(f"Filename: {bias_filename}") | |
bias_spec = bmgr.loadPredefinedBiasSpec(bias_filename) | |
grp1_terms, grp2_terms = bmgr.getSocialGroupTerms(bias_spec) | |
att1_terms, att2_terms = bmgr.getAttributeTerms(bias_spec) | |
print(f"Grp 1: {grp1_terms}") | |
print(f"Grp 2: {grp2_terms}") | |
print(f"Att 1: {att1_terms}") | |
print(f"Att 2: {att2_terms}") | |
#use_paper_sentences = True | |
return (', '.join(grp1_terms[0:50]), ', '.join(grp2_terms[0:50]), ', '.join(att1_terms[0:50]), ', '.join(att2_terms[0:50])) | |
def updateErrorMsg(isError, text): | |
return gr.Markdown.update(visible=isError, value=text) | |
def generateSentences(gr1, gr2, att1, att2, openai_key, num_sent2gen, progress=gr.Progress()): | |
global use_paper_sentences, G_NUM_SENTENCES | |
print(f"GENERATE SENTENCES CLICKED!, requested sentence number: {num_sent2gen}") | |
# No error messages by default | |
err_update = updateErrorMsg(False, "") | |
bias_gen_states = [True, False] | |
online_gen_visible = True | |
info_msg_update = gr.Markdown.update(visible=False, value="") | |
test_sentences = [] | |
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) | |
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) | |
total_att_terms = len(a1)+len(a2) | |
all_terms_len = len(g1)+len(g2)+len(a1)+len(a2) | |
print(f"Length of all the terms: {all_terms_len}") | |
if all_terms_len == 0: | |
print("No terms entered!") | |
err_update = updateErrorMsg(True, NO_TERMS_ENTERED_ERROR) | |
#raise gr.Error(NO_TERMS_ENTERED_ERROR) | |
else: | |
if len(openai_key) == 0: | |
print("Empty OpenAI key!!!") | |
err_update = updateErrorMsg(True, OPENAI_KEY_EMPTY) | |
elif len(openai_key) < 10: | |
print("Wrong length OpenAI key!!!") | |
err_update = updateErrorMsg(True, OPENAI_KEY_WRONG) | |
else: | |
progress(0, desc="ChatGPT generation...") | |
print(f"Using Online Generator LLM...") | |
test_sentences = rq_mgr._generateOnline(bias_spec, progress, openai_key, False) | |
#print(f"Test sentences: {test_sentences}") | |
num_sentences = len(test_sentences) | |
print(f"Returned num sentences: {num_sentences}") | |
G_NUM_SENTENCES = num_sentences | |
if G_NUM_SENTENCES == 0: | |
print("Test sentences empty!") | |
#raise gr.Error(NO_SENTENCES_ERROR) | |
err_update = updateErrorMsg(True, NO_SENTENCES_ERROR) | |
else: | |
# has all sentences, can bias test | |
bias_gen_states = [False, True] | |
online_gen_visible = False | |
info_msg = _genSentenceCoverMsg(test_sentences, total_att_terms, isGen=True) | |
info_msg_update = gr.Markdown.update(visible=True, value=info_msg) | |
print(f"Online gen visible: {not err_update['visible']}") | |
return (err_update, # err message if any | |
info_msg_update, # infor message about the number of sentences and coverage | |
gr.Row.update(visible=online_gen_visible), # online gen row | |
#gr.Slider.update(minimum=8, maximum=24, value=4), # slider generation | |
gr.Dropdown.update(visible=not online_gen_visible), # tested model selection dropdown | |
gr.Accordion.update(visible=not online_gen_visible, label=f"Test sentences ({len(test_sentences)})"), # accordion | |
gr.update(visible=True), # Row sentences | |
gr.DataFrame.update(value=test_sentences), #DataFrame test sentences | |
gr.update(visible=bias_gen_states[0]), # gen btn | |
gr.update(visible=bias_gen_states[1]) # bias btn | |
) | |
def useOnlineGen(value): | |
if value == True: | |
btn_label = "Generate New Sentences" | |
else: | |
btn_label = "Use Saved Sentences" | |
return (gr.update(visible=value), # OpenAI key TextBox | |
gr.update(value=btn_label), # Generate button | |
gr.update(visible=value) # Slider | |
) | |
# Interaction with top tabs | |
def moveStep1(): | |
variants = ["primary","secondary","secondary"] | |
#inter = [True, False, False] | |
tabs = [True, False, False] | |
return (gr.update(variant=variants[0]), | |
gr.update(variant=variants[1]), | |
gr.update(variant=variants[2]), | |
gr.update(visible=tabs[0]), | |
gr.update(visible=tabs[1]), | |
gr.update(visible=tabs[2])) | |
def moveStep2(): | |
variants = ["secondary","primary","secondary"] | |
#inter = [True, True, False] | |
tabs = [False, True, False] | |
return (gr.update(variant=variants[0]), | |
gr.update(variant=variants[1]), | |
gr.update(variant=variants[2]), | |
gr.update(visible=tabs[0]), | |
gr.update(visible=tabs[1]), | |
gr.update(visible=tabs[2])) | |
def moveStep3(): | |
variants = ["secondary","secondary","primary"] | |
#inter = [True, True, False] | |
tabs = [False, False, True] | |
return (gr.update(variant=variants[0]), | |
gr.update(variant=variants[1]), | |
gr.update(variant=variants[2]), | |
gr.update(visible=tabs[0]), | |
gr.update(visible=tabs[1]), | |
gr.update(visible=tabs[2])) | |
def _genSentenceCoverMsg(test_sentences, total_att_terms, isGen=False): | |
att_cover_dict = {} | |
for grp, att, sent in test_sentences: | |
num = att_cover_dict.get(att, 0) | |
att_cover_dict[att] = num+1 | |
att_by_count = dict(sorted(att_cover_dict.items(), key=lambda item: item[1])) | |
num_covered_atts = len(list(att_by_count.keys())) | |
lest_covered_att = list(att_by_count.keys())[0] | |
least_covered_count = att_by_count[lest_covered_att] | |
source_msg = "Found" if isGen==False else "Generated" | |
if num_covered_atts >= total_att_terms: | |
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes. Please select model to test.**" | |
else: | |
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**" | |
return info_msg | |
def retrieveSentences(gr1, gr2, att1, att2, progress=gr.Progress()): | |
global use_paper_sentences, G_NUM_SENTENCES | |
print("RETRIEVE SENTENCES CLICKED!") | |
variants = ["secondary","primary","secondary"] | |
inter = [True, True, False] | |
tabs = [True, False] | |
bias_gen_states = [True, False] | |
prog_vis = [True] | |
info_msg_update = gr.Markdown.update(visible=False, value="") | |
openai_gen_row_update = gr.Row.update(visible=True) | |
tested_model_dropdown_update = gr.Dropdown.update(visible=False) | |
test_sentences = [] | |
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) | |
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) | |
total_att_terms = len(a1)+len(a2) | |
all_terms_len = len(g1)+len(g2)+len(a1)+len(a2) | |
print(f"Length of all the terms: {all_terms_len}") | |
if all_terms_len == 0: | |
print("No terms entered!") | |
err_update = updateErrorMsg(True, NO_TERMS_ENTERED_ERROR) | |
variants = ["primary","secondary","secondary"] | |
inter = [True, False, False] | |
tabs = [True, False] | |
prog_vis = [False] | |
#raise gr.Error(NO_TERMS_ENTERED_ERROR) | |
else: | |
tabs = [False, True] | |
progress(0, desc="Fetching saved sentences...") | |
test_sentences = rq_mgr._getSavedSentences(bias_spec, progress, use_paper_sentences) | |
#err_update, _, test_sentences = generateSentences(gr1, gr2, att1, att2, progress) | |
print(f"Type: {type(test_sentences)}") | |
num_sentences = len(test_sentences) | |
print(f"Returned num sentences: {num_sentences}") | |
err_update = updateErrorMsg(False, "") | |
G_NUM_SENTENCES = num_sentences | |
if G_NUM_SENTENCES == 0: | |
print("Test sentences empty!") | |
#raise gr.Error(NO_SENTENCES_ERROR) | |
err_update = updateErrorMsg(True, NO_SENTENCES_ERROR) | |
if len(test_sentences) > 0: | |
info_msg = _genSentenceCoverMsg(test_sentences, total_att_terms) | |
info_msg_update = gr.Markdown.update(visible=True, value=info_msg) | |
print(f"Got {len(test_sentences)}, allowing bias test...") | |
print(test_sentences) | |
bias_gen_states = [False, True] | |
openai_gen_row_update = gr.Row.update(visible=False) | |
tested_model_dropdown_update = gr.Dropdown.update(visible=True) | |
return (err_update, # error message | |
openai_gen_row_update, # OpenAI generation | |
tested_model_dropdown_update, # Tested Model Dropdown | |
info_msg_update, # sentences retrieved info update | |
gr.update(visible=prog_vis), # progress bar top | |
gr.update(variant=variants[0], interactive=inter[0]), # breadcrumb btn1 | |
gr.update(variant=variants[1], interactive=inter[1]), # breadcrumb btn2 | |
gr.update(variant=variants[2], interactive=inter[2]), # breadcrumb btn3 | |
gr.update(visible=tabs[0]), # tab 1 | |
gr.update(visible=tabs[1]), # tab 2 | |
gr.Accordion.update(visible=bias_gen_states[1], label=f"Test sentences ({len(test_sentences)})"), # accordion | |
gr.update(visible=True), # Row sentences | |
gr.DataFrame.update(value=test_sentences), #DataFrame test sentences | |
gr.update(visible=bias_gen_states[0]), # gen btn | |
gr.update(visible=bias_gen_states[1]), # bias btn | |
gr.update(value=', '.join(g1)), # gr1_fixed | |
gr.update(value=', '.join(g2)), # gr2_fixed | |
gr.update(value=', '.join(a1)), # att1_fixed | |
gr.update(value=', '.join(a2)) # att2_fixed | |
) | |
def bloombergViz(att, val, numblocks, score_templates_df, onRight=False, flip=False): | |
# percent = round(val * 100) | |
# percentStr = f"{percent}" | |
# filled = "<div style='height:20px;width:20px;background-color:#555;display:inline-block'><span class='tooltiptext' style='color:#FFF'>{}</span></div> " | |
# unfilled = "<div style='height:20px;width:20px;background-color:#999;display:inline-block'><span class='tooltiptext' style='color:#FFF'>{}</span></div> " | |
# numFilled = round((percent/100) * numblocks) | |
# numUnFilled = numblocks - numFilled | |
leftColor = "#555" | |
rightColor = "#999" | |
if flip: | |
leftColor = "#999" | |
rightColor = "#555" | |
res = "" | |
spanClass = "tooltiptext_left" | |
if onRight: | |
spanClass = "tooltiptext_right" | |
dfy = score_templates_df.loc[(score_templates_df['Attribute'] == att) & (score_templates_df['Stereotyped'] == 'yes')] | |
dfn = score_templates_df.loc[(score_templates_df['Attribute'] == att) & (score_templates_df['Stereotyped'] == 'no')] | |
print("dfy", dfy) | |
print("dfn", dfn) | |
for i in range(len(dfy.index)): | |
sentence = dfy.iloc[i,3] | |
disp = sentence.replace("[MASK]", "[SOCIAL GROUP TERM]", 1) | |
res += f"<div style='height:20px;width:20px;background-color:{leftColor};display:inline-block;position:relative' id='filled'><span class='{spanClass}' style='color:#FFF'>{disp}</span></div> " | |
for i in range(len(dfn.index)): | |
sentence = dfn.iloc[i,3] | |
disp = sentence.replace("[MASK]", "[SOCIAL GROUP TERM]", 1) | |
res += f"<div style='height:20px;width:20px;background-color:{rightColor};display:inline-block;position:relative' id='empty'><span class='{spanClass}' style='color:#FFF'>{disp}</span></div> " | |
return res | |
# if flip: | |
# return numFilled * unfilled + numUnFilled * filled; | |
# return numFilled * filled + numUnFilled * unfilled | |
def att_bloombergViz(att, val, numblocks, score_templates_df, onRight=False, flip=False): | |
viz = bloombergViz(att, val, numblocks, score_templates_df, onRight, flip) | |
attHTML = f"<div style='border-style:solid;border-color:#999;border-radius:12px'>{att}: {round(val*100)}%<br>{viz}</div><br>" | |
return attHTML | |
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()): | |
global G_NUM_SENTENCES | |
variants = ["secondary","secondary","primary"] | |
inter = [True, True, True] | |
tabs = [False, False, True] | |
if test_sentences_df.shape[0] == 0: | |
G_NUM_SENTENCES = 0 | |
raise gr.Error(NO_SENTENCES_ERROR) | |
progress(0, desc="Starting social bias testing...") | |
print(f"Type: {type(test_sentences_df)}") | |
print(f"Data: {test_sentences_df}") | |
att_freqs = {} | |
for att in test_sentences_df["Attribute term"].tolist(): | |
if att in att_freqs: | |
att_freqs[att] += 1 | |
else: | |
att_freqs[att] = 1 | |
print(f"att_freqs: {att_freqs}") | |
# 1. bias specification | |
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) | |
print(f"Bias spec dict: {bias_spec}") | |
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) | |
attributes_g1 = bias_spec['attributes']['attribute 1'] | |
attributes_g2 = bias_spec['attributes']['attribute 2'] | |
# 2. convert to templates | |
test_sentences_df['Template'] = test_sentences_df.apply(bt_mgr.sentence_to_template, axis=1) | |
print(f"Data with template: {test_sentences_df}") | |
# 3. convert to pairs | |
test_pairs_df = bt_mgr.convert2pairs(bias_spec, test_sentences_df) | |
print(f"Test pairs: {test_pairs_df.head(3)}") | |
progress(0.05, desc=f"Loading model {model_name}...") | |
# 4. get the per sentence bias scores | |
print(f"Test model name: {model_name}") | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
print(f"Device: {device}") | |
tested_model, tested_tokenizer = bt_mgr._getModel(model_name, device) | |
#print(f"Mask token id: {tested_toknizer.mask_token_id}") | |
# sanity check bias test | |
bt_mgr.testModelProbability(model_name, tested_model, tested_tokenizer, device) | |
# testing actual sentences | |
test_score_df, bias_stats_dict = bt_mgr.testBiasOnPairs(test_pairs_df, bias_spec, model_name, tested_model, tested_tokenizer, device, progress) | |
print(f"Test scores: {test_score_df.head(3)}") | |
model_bias_dict = {} | |
model_bias_dict[bias_stats_dict['tested_model']] = bias_stats_dict['model_bias'] | |
per_attrib_bias = bias_stats_dict['per_attribute'] | |
attrib_by_score = dict(sorted(per_attrib_bias.items(), key=lambda item: item[1], reverse=True)) | |
# model_bias_HTML = bloombergViz(bias_stats_dict['model_bias']) | |
print(f"attrib_by_score: {attrib_by_score}") | |
# bias score | |
#test_pairs_df['bias_score'] = 0 | |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit'] | |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 0, 'bias_score'] = test_pairs_df['bottom_logit']-test_pairs_df['top_logit'] | |
test_pairs_df['groups_rel'] = test_pairs_df['att_term_1']+"/"+test_pairs_df['att_term_2'] | |
test_pairs_df['stereotyped_b'] = "Unknown" | |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'stereotyped_b'] = "yes" | |
test_pairs_df.loc[test_pairs_df['stereotyped'] == 0, 'stereotyped_b'] = "no" | |
# construct display dataframe | |
score_templates_df = test_pairs_df[['group_term','template']].copy() | |
score_templates_df['Groups'] = test_pairs_df['groups_rel'] | |
#score_templates_df['Bias Score'] = np.round(test_pairs_df['bias_score'],2) | |
score_templates_df['Stereotyped'] = test_pairs_df['stereotyped_b'] | |
score_templates_df = score_templates_df.rename(columns = {'group_term': "Attribute", | |
"template": "Template"}) | |
#'Bias Score' | |
score_templates_df = score_templates_df[['Stereotyped','Attribute','Groups','Template']] | |
num_sentences = score_templates_df.shape[0] | |
print(f"score_templates_df: {score_templates_df}") | |
per_attrib_bias_HTML_stereo = "" | |
num_atts = 0 | |
for att, score in attrib_by_score.items(): | |
if att in attributes_g1: | |
per_attrib_bias_HTML_stereo += att_bloombergViz(att, score, att_freqs[att], score_templates_df, False, False) | |
num_atts += 1 | |
if num_atts >= 8: | |
break | |
per_attrib_bias_HTML_antistereo = "" | |
num_atts = 0 | |
for att, score in attrib_by_score.items(): | |
if att in attributes_g2: | |
per_attrib_bias_HTML_antistereo += att_bloombergViz(att, score, att_freqs[att], score_templates_df, True, True) | |
num_atts += 1 | |
if num_atts >= 8: | |
break | |
interpret_msg = bt_mgr._constructInterpretationMsg(bias_spec, num_sentences, | |
model_name, bias_stats_dict, per_attrib_bias, | |
score_templates_df | |
) | |
return (gr.Markdown.update(visible=True), # bar progress | |
gr.Button.update(variant=variants[0], interactive=inter[0]), # top breadcrumb button 1 | |
gr.Button.update(variant=variants[1], interactive=inter[1]), # top breadcrumb button 2 | |
gr.Button.update(variant=variants[2], interactive=inter[2]), # top breadcrumb button 3 | |
gr.update(visible=tabs[0]), # content tab/column 1 | |
gr.update(visible=tabs[1]), # content tab/column 2 | |
gr.update(visible=tabs[2]), # content tab/column 3 | |
model_bias_dict, # per model bias score | |
gr.update(value=per_attrib_bias_HTML_stereo), # per attribute bias score stereotyped | |
gr.update(value=per_attrib_bias_HTML_antistereo), # per attribute bias score antistereotyped | |
gr.update(value=score_templates_df, visible=True), # Pairs with scores | |
gr.update(value=interpret_msg, visible=True), # Interpretation message | |
gr.update(value=', '.join(g1)), # gr1_fixed | |
gr.update(value=', '.join(g2)), # gr2_fixed | |
gr.update(value=', '.join(a1)), # att1_fixed | |
gr.update(value=', '.join(a2)) # att2_fixed | |
) | |
# Sleecting an attribute label in the label component | |
def selectAttributeLabel(evt: gr.SelectData): | |
print(f"Selected {evt.value} at {evt.index} from {evt.target}") | |
return () | |
theme = gr.themes.Soft().set( | |
button_small_radius='*radius_xxs', | |
background_fill_primary='*neutral_50', | |
border_color_primary='*primary_50' | |
) | |
soft = gr.themes.Soft( | |
primary_hue="slate", | |
spacing_size="sm", | |
radius_size="md" | |
).set( | |
# body_background_fill="white", | |
button_primary_background_fill='*primary_400' | |
) | |
css_adds = "#group_row {background: white; border-color: white;} \ | |
#attribute_row {background: white; border-color: white;} \ | |
#tested_model_row {background: white; border-color: white;} \ | |
#button_row {background: white; border-color: white;} \ | |
#examples_elem .label {display: none}\ | |
#att1_words {border-color: white;} \ | |
#att2_words {border-color: white;} \ | |
#group1_words {border-color: white;} \ | |
#group2_words {border-color: white;} \ | |
#tested_model_drop {border-color: white;} \ | |
#gen_model_check {border-color: white;} \ | |
#gen_model_check .wrap {border-color: white;} \ | |
#gen_model_check .form {border-color: white;} \ | |
#open_ai_key_box {border-color: white;} \ | |
#gen_col {border-color: white;} \ | |
#gen_col .form {border-color: white;} \ | |
#res_label {background-color: #F8FAFC;} \ | |
#per_attrib_label_elem {background-color: #F8FAFC;} \ | |
#accordion {border-color: #E5E7EB} \ | |
#err_msg_elem p {color: #FF0000; cursor: pointer} \ | |
.tooltiptext_left {visibility: hidden;max-width:50ch;min-width:25ch;top: 100%;left: 0%;background-color: #222;text-align: center;border-radius: 6px;padding: 5px 0;position: absolute;z-index: 1;} \ | |
.tooltiptext_right {visibility: hidden;max-width:50ch;min-width:25ch;top: 100%;right: 0%;background-color: #222;text-align: center;border-radius: 6px;padding: 5px 0;position: absolute;z-index: 1;} \ | |
#filled:hover .tooltiptext_left {visibility: visible;} \ | |
#empty:hover .tooltiptext_left {visibility: visible;} \ | |
#filled:hover .tooltiptext_right {visibility: visible;} \ | |
#empty:hover .tooltiptext_right {visibility: visible;}" | |
#'bethecloud/storj_theme' | |
with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models", | |
css=css_adds) as iface: | |
with gr.Row(): | |
with gr.Group(): | |
s1_btn = gr.Button(value="Step 1: Bias Specification", variant="primary", visible=True, interactive=True).style(size='sm') | |
s2_btn = gr.Button(value="Step 2: Test Sentences", variant="secondary", visible=True, interactive=False).style(size='sm') | |
s3_btn = gr.Button(value="Step 3: Bias Testing", variant="secondary", visible=True, interactive=False).style(size='sm') | |
err_message = gr.Markdown("", visible=False, elem_id="err_msg_elem") | |
bar_progress = gr.Markdown(" ") | |
# Page 1 | |
with gr.Column(visible=True) as tab1: | |
with gr.Column(): | |
gr.Markdown("### Social Bias Specification") | |
gr.Markdown("Use one of the predefined specifications or enter own terms for social groups and attributes") | |
with gr.Row(): | |
example_biases = gr.Dropdown( | |
value="Select a predefined bias to test", | |
allow_custom_value=False, | |
interactive=True, | |
choices=["Male/Female <> Professions", | |
"Male/Female <> Math/Art", | |
"Male/Female <> Science/Art", | |
"Male/Female <> Career/Family", | |
"Eur.-American/Afr.-American <> Pleasant/Unpleasant #3", | |
"African-Female/European-Male <> Intersectional", | |
"African-Female/European-Male <> Emergent", | |
"Mexican-Female/European-Male <> Intersectional", | |
"Mexican-Female/European-Male <> Emergent", | |
"Mental/Physical Disease <> Temporary/Permanent", | |
"Young/Old Name <> Pleasant/Unpleasant" | |
], label="Example Biases", #info="Select a predefied bias specification to fill-out the terms below." | |
) | |
#bias_img = gr.HTML(value="<img src='https://docs.streamlit.io/logo.svg'>Bias test result saved! </img>", | |
# visible=True) | |
with gr.Row(elem_id="group_row"): | |
group1 = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", placeholder="brother, father") | |
group2 = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", placeholder="sister, mother") | |
with gr.Row(elem_id="attribute_row"): | |
att1 = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", placeholder="science, technology") | |
att2 = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", placeholder="poetry, art") | |
with gr.Row(): | |
gr.Markdown(" ") | |
get_sent_btn = gr.Button(value="Get Sentences", variant="primary", visible=True) | |
gr.Markdown(" ") | |
# Page 2 | |
with gr.Column(visible=False) as tab2: | |
info_sentences_found = gr.Markdown(value="", visible=False) | |
gr.Markdown("### Tested Social Bias Specification", visible=True) | |
with gr.Row(): | |
group1_fixed = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", interacive=False, visible=True) | |
group2_fixed = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", interacive=False, visible=True) | |
with gr.Row(): | |
att1_fixed = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", interacive=False, visible=True) | |
att2_fixed = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", interacive=False, visible=True) | |
with gr.Row(): | |
with gr.Column(): | |
#use_online_gen = gr.Checkbox(label="Generate new sentences with ChatGPT (requires Open AI Key)", | |
# value=False, | |
# elem_id="gen_model_check") | |
with gr.Row(visible=False) as online_gen_row: | |
# OpenAI Key for generator | |
openai_key = gr.Textbox(lines=1, label="OpenAI API Key", | |
placeholder="starts with sk-", | |
info="Please provide the key for an Open AI account to generate new test sentences", | |
visible=True, | |
interactive=True, | |
elem_id="open_ai_key_box") | |
num_sentences2gen = gr.Slider(8, 20, value=8, step=2, | |
interactive=True, | |
visible=True, | |
info="Default value is recommended based on the number of attribute and group terms you entered.", | |
label="Number of test sentences to generate").style(container=True) #, info="Number of Sentences to Generate") | |
# Tested Model Selection - "emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt" | |
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt"], value="bert-base-uncased", | |
multiselect=None, | |
interactive=True, | |
label="Tested Language Model", | |
elem_id="tested_model_drop", | |
visible=True | |
#info="Select the language model to test for social bias." | |
) | |
with gr.Row(): | |
gr.Markdown(" ") | |
gen_btn = gr.Button(value="Generate New Sentences", variant="primary", visible=True) | |
bias_btn = gr.Button(value="Test Model for Social Bias", variant="primary", visible=False) | |
gr.Markdown(" ") | |
with gr.Row(visible=False) as row_sentences: | |
with gr.Accordion(label="Test Sentences", open=False, visible=False) as acc_test_sentences: | |
test_sentences = gr.DataFrame( | |
headers=["Test sentence", "Group term", "Attribute term"], | |
datatype=["str", "str", "str"], | |
row_count=(1, 'dynamic'), | |
col_count=(3, 'fixed'), | |
interactive=True, | |
visible=True, | |
#label="Generated Test Sentences", | |
max_rows=2, | |
overflow_row_behaviour="paginate") | |
# Page 3 | |
with gr.Column(visible=False) as tab3: | |
gr.Markdown("### Tested Social Bias Specification") | |
with gr.Row(): | |
group1_fixed2 = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", interacive=False) | |
group2_fixed2 = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", interacive=False) | |
with gr.Row(): | |
att1_fixed2 = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", interacive=False) | |
att2_fixed2 = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", interacive=False) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Markdown("### Bias Test Results") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)") | |
# model_bias_html = gr.HTML() | |
model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)", | |
elem_id="res_label", | |
show_label=False) | |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)") | |
gr.Markdown("**Legend**") | |
gr.HTML("<div style='height:20px;width:20px;background-color:#555;display:inline-block;vertical-align:top'></div><div style='display:inline-block;vertical-align:top'> : Group 1 attribute</div><br><div style='height:20px;width:20px;background-color:#999;display:inline-block;vertical-align:top'></div><div style='display:inline-block;vertical-align:top'> : Group 2 attribute</div>") | |
# attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)", | |
# elem_id="per_attrib_label_elem", | |
# container=True, | |
# min_width=900, | |
# show_label=False) | |
with gr.Row(): | |
with gr.Column(variant="compact"): | |
gr.Markdown("#### Attribute Group 1") | |
attribute_bias_html_stereo = gr.HTML() | |
with gr.Column(variant="compact"): | |
gr.Markdown("#### Attribute Group 2") | |
attribute_bias_html_antistereo = gr.HTML() | |
# with gr.Row(): | |
# with gr.Column(scale=1): | |
# gr.Markdown("### Interpretation") | |
with gr.Row(): | |
# with gr.Accordion("Interpretation", open=False, visible=True): | |
with gr.Column(): | |
gr.Markdown("**Interpretation**") | |
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False) | |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>", | |
visible=False) | |
with gr.Row(): | |
with gr.Accordion("Per Sentence Bias Results", open=False, visible=True): | |
test_pairs = gr.DataFrame( | |
headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"], | |
datatype=["str", "str", "str", "str", "str", "str"], | |
row_count=(1, 'dynamic'), | |
#label="Bias Test Results Per Test Sentence Template", | |
max_rows=2, | |
overflow_row_behaviour="paginate" | |
) | |
# select from predefined bias specifications | |
example_biases.select(fn=prefillBiasSpec, | |
inputs=None, | |
outputs=[group1, group2, att1, att2]) | |
# Get sentences | |
get_sent_btn.click(fn=retrieveSentences, | |
inputs=[group1, group2, att1, att2], | |
outputs=[err_message, online_gen_row, tested_model_name, info_sentences_found, bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn, | |
group1_fixed, group2_fixed, att1_fixed, att2_fixed ]) | |
# request getting sentences | |
gen_btn.click(fn=generateSentences, | |
inputs=[group1, group2, att1, att2, openai_key, num_sentences2gen], | |
outputs=[err_message, info_sentences_found, online_gen_row, #num_sentences2gen, | |
tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ]) | |
# Test bias | |
bias_btn.click(fn=startBiasTest, | |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name], | |
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_html_stereo, attribute_bias_html_antistereo, | |
test_pairs, interpretation_msg, group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2] | |
) | |
# top breadcrumbs | |
s1_btn.click(fn=moveStep1, | |
inputs=[], | |
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) | |
# top breadcrumbs | |
s2_btn.click(fn=moveStep2, | |
inputs=[], | |
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) | |
# top breadcrumbs | |
s3_btn.click(fn=moveStep3, | |
inputs=[], | |
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) | |
# Additional Interactions | |
# attribute_bias_labels.select(fn=selectAttributeLabel, | |
# inputs=[], | |
# outputs=[]) | |
# tick checkbox to use online generation | |
#use_online_gen.change(fn=useOnlineGen, | |
# inputs=[use_online_gen], | |
# outputs=[openai_key, gen_btn, num_sentences]) | |
iface.queue(concurrency_count=2).launch() |