Spaces:
Sleeping
Sleeping
merged
Browse files- Images/Forgetting1.png +0 -0
- Images/Forgetting2.png +0 -0
- Images/SLD1.png +0 -0
- Images/SLD2.png +0 -0
- Images/TANGO1.png +0 -0
- Images/TANGO2.png +0 -0
- Images/WEAT1.png +0 -0
- Images/WEAT2.png +0 -0
- __pycache__/css.cpython-312.pyc +0 -0
- app.py +119 -26
- configs/crowspairs.yaml +1 -1
- configs/homoglyphbias.yaml +0 -16
- configs/honest.yaml +1 -1
- configs/ieat.yaml +1 -1
- configs/imagedataleak.yaml +1 -1
- configs/measuringforgetting.yaml +19 -0
- configs/notmyvoice.yaml +1 -1
- configs/palms.yaml +14 -0
- configs/safelatentdiff.yaml +17 -0
- configs/stablebias.yaml +1 -1
- configs/stereoset.yaml +0 -16
- configs/tango.yaml +19 -0
- configs/videodiversemisinfo.yaml +1 -1
- configs/weat.yaml +1 -1
Images/Forgetting1.png
ADDED
![]() |
Images/Forgetting2.png
ADDED
![]() |
Images/SLD1.png
ADDED
![]() |
Images/SLD2.png
ADDED
![]() |
Images/TANGO1.png
ADDED
![]() |
Images/TANGO2.png
ADDED
![]() |
Images/WEAT1.png
CHANGED
![]() |
![]() |
Images/WEAT2.png
CHANGED
![]() |
![]() |
__pycache__/css.cpython-312.pyc
CHANGED
Binary files a/__pycache__/css.cpython-312.pyc and b/__pycache__/css.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -4,13 +4,31 @@ import pandas as pd
|
|
4 |
from gradio_modal import Modal
|
5 |
import os
|
6 |
import yaml
|
7 |
-
|
8 |
|
9 |
folder_path = 'configs'
|
10 |
# List to store data from YAML files
|
11 |
data_list = []
|
12 |
metadata_dict = {}
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Iterate over each file in the folder
|
15 |
for filename in os.listdir(folder_path):
|
16 |
if filename.endswith('.yaml'):
|
@@ -27,25 +45,24 @@ globaldf['Link'] = '<u>'+globaldf['Link']+'</u>'
|
|
27 |
|
28 |
# Define the desired order of categories
|
29 |
modality_order = ["Text", "Image", "Audio", "Video"]
|
30 |
-
|
31 |
|
32 |
-
|
|
|
|
|
|
|
33 |
globaldf['Modality'] = pd.Categorical(globaldf['Modality'], categories=modality_order, ordered=True)
|
34 |
-
globaldf['
|
35 |
|
36 |
-
# Sort DataFrame by Modality and
|
37 |
-
globaldf.sort_values(by=['Modality', '
|
38 |
|
39 |
# create a gradio page with tabs and accordions
|
40 |
|
41 |
# Path: taxonomy.py
|
42 |
|
43 |
-
def
|
44 |
-
filteredtable =
|
45 |
-
return filteredtable
|
46 |
-
|
47 |
-
def filter_type(filteredtable, modality_filter):
|
48 |
-
filteredtable = filteredtable[filteredtable['Type'].isin(modality_filter)]
|
49 |
return filteredtable
|
50 |
|
51 |
def showmodal(evt: gr.SelectData):
|
@@ -55,6 +72,7 @@ def showmodal(evt: gr.SelectData):
|
|
55 |
authormd = gr.Markdown("",visible=False)
|
56 |
tagsmd = gr.Markdown("",visible=False)
|
57 |
abstractmd = gr.Markdown("",visible=False)
|
|
|
58 |
datasetmd = gr.Markdown("",visible=False)
|
59 |
gallery = gr.Gallery([],visible=False)
|
60 |
if evt.index[1] == 5:
|
@@ -67,6 +85,12 @@ def showmodal(evt: gr.SelectData):
|
|
67 |
tagstr = ''.join(['<span class="tag">#'+tag+'</span> ' for tag in tags])
|
68 |
tagsmd = gr.Markdown(tagstr, visible=True)
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
titlemd = gr.Markdown('# ['+itemdic['Link']+']('+itemdic['URL']+')',visible=True)
|
71 |
|
72 |
if pd.notnull(itemdic['Authors']):
|
@@ -83,7 +107,7 @@ def showmodal(evt: gr.SelectData):
|
|
83 |
if len(screenshots) > 0:
|
84 |
gallery = gr.Gallery(screenshots, visible=True)
|
85 |
|
86 |
-
return [modal, titlemd, authormd, tagsmd, abstractmd, datasetmd, gallery]
|
87 |
|
88 |
with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.themes.Base()) as demo: #theme=gr.themes.Soft(),
|
89 |
# create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
|
@@ -96,18 +120,18 @@ with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.
|
|
96 |
gr.Markdown("""
|
97 |
#### Technical Base System Evaluations:
|
98 |
|
99 |
-
Below we list the aspects possible to evaluate in a generative system. Context-absent evaluations only provide narrow insights into the described aspects of the
|
100 |
|
101 |
The following categories are high-level, non-exhaustive, and present a synthesis of the findings across different modalities. They refer solely to what can be evaluated in a base technical system:
|
102 |
|
103 |
""")
|
104 |
with gr.Tabs(elem_classes="tab-buttons") as tabs1:
|
105 |
-
with gr.TabItem("Bias/
|
106 |
fulltable = globaldf[globaldf['Group'] == 'BiasEvals']
|
107 |
-
fulltable = fulltable[['Modality','
|
108 |
|
109 |
gr.Markdown("""
|
110 |
-
Generative AI systems can perpetuate harmful biases from various sources, including systemic, human, and statistical biases. These biases, also known as "fairness" considerations, can manifest in the final system due to choices made throughout the development process. They include harmful associations and
|
111 |
""")
|
112 |
with gr.Row():
|
113 |
modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
|
@@ -116,17 +140,17 @@ The following categories are high-level, non-exhaustive, and present a synthesis
|
|
116 |
show_label=True,
|
117 |
# info="Which modality to show."
|
118 |
)
|
119 |
-
|
120 |
value=["Model", "Dataset", "Output", "Taxonomy"],
|
121 |
-
label="
|
122 |
show_label=True,
|
123 |
# info="Which modality to show."
|
124 |
)
|
125 |
with gr.Row():
|
126 |
-
|
127 |
-
|
128 |
-
modality_filter.change(
|
129 |
-
|
130 |
|
131 |
|
132 |
with Modal(visible=False) as modal:
|
@@ -138,25 +162,94 @@ The following categories are high-level, non-exhaustive, and present a synthesis
|
|
138 |
gr.Markdown("### What it is evaluating", visible=True)
|
139 |
gr.Markdown('## Resources', visible=True)
|
140 |
gr.Markdown('### What you need to do this evaluation', visible=True)
|
|
|
141 |
datasetmd = gr.Markdown(visible=False)
|
142 |
gr.Markdown("## Results", visible=True)
|
143 |
gr.Markdown("### Metrics", visible=True)
|
144 |
gallery = gr.Gallery(visible=False)
|
145 |
-
|
146 |
|
147 |
|
148 |
|
149 |
with gr.TabItem("Cultural Values/Sensitive Content"):
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
with gr.Row():
|
151 |
-
gr.Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
# with gr.TabItem("Disparate Performance"):
|
154 |
# with gr.Row():
|
155 |
# gr.Image()
|
156 |
|
157 |
with gr.TabItem("Privacy/Data Protection"):
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
with gr.Row():
|
159 |
-
gr.Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# with gr.TabItem("Financial Costs"):
|
162 |
# with gr.Row():
|
|
|
4 |
from gradio_modal import Modal
|
5 |
import os
|
6 |
import yaml
|
7 |
+
import itertools
|
8 |
|
9 |
folder_path = 'configs'
|
10 |
# List to store data from YAML files
|
11 |
data_list = []
|
12 |
metadata_dict = {}
|
13 |
|
14 |
+
|
15 |
+
def expand_string_list(string_list):
|
16 |
+
expanded_list = []
|
17 |
+
|
18 |
+
# Add individual strings to the expanded list
|
19 |
+
expanded_list.extend(string_list)
|
20 |
+
|
21 |
+
# Generate combinations of different lengths from the input list
|
22 |
+
for r in range(2, len(string_list) + 1):
|
23 |
+
combinations = itertools.combinations(string_list, r)
|
24 |
+
for combination in combinations:
|
25 |
+
# Generate permutations of each combination
|
26 |
+
permutations = itertools.permutations(combination)
|
27 |
+
for permutation in permutations:
|
28 |
+
expanded_list.append(' + '.join(permutation))
|
29 |
+
|
30 |
+
return expanded_list
|
31 |
+
|
32 |
# Iterate over each file in the folder
|
33 |
for filename in os.listdir(folder_path):
|
34 |
if filename.endswith('.yaml'):
|
|
|
45 |
|
46 |
# Define the desired order of categories
|
47 |
modality_order = ["Text", "Image", "Audio", "Video"]
|
48 |
+
level_order = ["Model", "Dataset", "Output", "Taxonomy"]
|
49 |
|
50 |
+
modality_order = expand_string_list(modality_order)
|
51 |
+
level_order = expand_string_list(level_order)
|
52 |
+
|
53 |
+
# Convert Modality and Level columns to categorical with specified order
|
54 |
globaldf['Modality'] = pd.Categorical(globaldf['Modality'], categories=modality_order, ordered=True)
|
55 |
+
globaldf['Level'] = pd.Categorical(globaldf['Level'], categories=level_order, ordered=True)
|
56 |
|
57 |
+
# Sort DataFrame by Modality and Level
|
58 |
+
globaldf.sort_values(by=['Modality', 'Level'], inplace=True)
|
59 |
|
60 |
# create a gradio page with tabs and accordions
|
61 |
|
62 |
# Path: taxonomy.py
|
63 |
|
64 |
+
def filter_modality_level(fulltable, modality_filter, level_filter):
|
65 |
+
filteredtable = fulltable[fulltable['Modality'].str.contains('|'.join(modality_filter)) & fulltable['Level'].str.contains('|'.join(level_filter))]
|
|
|
|
|
|
|
|
|
66 |
return filteredtable
|
67 |
|
68 |
def showmodal(evt: gr.SelectData):
|
|
|
72 |
authormd = gr.Markdown("",visible=False)
|
73 |
tagsmd = gr.Markdown("",visible=False)
|
74 |
abstractmd = gr.Markdown("",visible=False)
|
75 |
+
modelsmd = gr.Markdown("",visible=False)
|
76 |
datasetmd = gr.Markdown("",visible=False)
|
77 |
gallery = gr.Gallery([],visible=False)
|
78 |
if evt.index[1] == 5:
|
|
|
85 |
tagstr = ''.join(['<span class="tag">#'+tag+'</span> ' for tag in tags])
|
86 |
tagsmd = gr.Markdown(tagstr, visible=True)
|
87 |
|
88 |
+
models = itemdic['Applicable Models']
|
89 |
+
if isinstance(models, list):
|
90 |
+
if len(models) > 0:
|
91 |
+
modelstr = '### Applicable Models: '+''.join(['<span class="tag">'+model+'</span> ' for model in models])
|
92 |
+
modelsmd = gr.Markdown(modelstr, visible=True)
|
93 |
+
|
94 |
titlemd = gr.Markdown('# ['+itemdic['Link']+']('+itemdic['URL']+')',visible=True)
|
95 |
|
96 |
if pd.notnull(itemdic['Authors']):
|
|
|
107 |
if len(screenshots) > 0:
|
108 |
gallery = gr.Gallery(screenshots, visible=True)
|
109 |
|
110 |
+
return [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery]
|
111 |
|
112 |
with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.themes.Base()) as demo: #theme=gr.themes.Soft(),
|
113 |
# create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
|
|
|
120 |
gr.Markdown("""
|
121 |
#### Technical Base System Evaluations:
|
122 |
|
123 |
+
Below we list the aspects possible to evaluate in a generative system. Context-absent evaluations only provide narrow insights into the described aspects of the level of generative AI system. The depth of literature and research on evaluations differ by modality with some modalities having sparse or no relevant literature, but the themes for evaluations can be applied to most systems.
|
124 |
|
125 |
The following categories are high-level, non-exhaustive, and present a synthesis of the findings across different modalities. They refer solely to what can be evaluated in a base technical system:
|
126 |
|
127 |
""")
|
128 |
with gr.Tabs(elem_classes="tab-buttons") as tabs1:
|
129 |
+
with gr.TabItem("Bias/Stereolevels"):
|
130 |
fulltable = globaldf[globaldf['Group'] == 'BiasEvals']
|
131 |
+
fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
|
132 |
|
133 |
gr.Markdown("""
|
134 |
+
Generative AI systems can perpetuate harmful biases from various sources, including systemic, human, and statistical biases. These biases, also known as "fairness" considerations, can manifest in the final system due to choices made throughout the development process. They include harmful associations and stereolevels related to protected classes, such as race, gender, and sexuality. Evaluating biases involves assessing correlations, co-occurrences, sentiment, and toxicity across different modalities, both within the model itself and in the outputs of downstream tasks.
|
135 |
""")
|
136 |
with gr.Row():
|
137 |
modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
|
|
|
140 |
show_label=True,
|
141 |
# info="Which modality to show."
|
142 |
)
|
143 |
+
level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
|
144 |
value=["Model", "Dataset", "Output", "Taxonomy"],
|
145 |
+
label="Level",
|
146 |
show_label=True,
|
147 |
# info="Which modality to show."
|
148 |
)
|
149 |
with gr.Row():
|
150 |
+
table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
|
151 |
+
table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
|
152 |
+
modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
153 |
+
level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
154 |
|
155 |
|
156 |
with Modal(visible=False) as modal:
|
|
|
162 |
gr.Markdown("### What it is evaluating", visible=True)
|
163 |
gr.Markdown('## Resources', visible=True)
|
164 |
gr.Markdown('### What you need to do this evaluation', visible=True)
|
165 |
+
modelsmd = gr.Markdown(visible=False)
|
166 |
datasetmd = gr.Markdown(visible=False)
|
167 |
gr.Markdown("## Results", visible=True)
|
168 |
gr.Markdown("### Metrics", visible=True)
|
169 |
gallery = gr.Gallery(visible=False)
|
170 |
+
table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
|
171 |
|
172 |
|
173 |
|
174 |
with gr.TabItem("Cultural Values/Sensitive Content"):
|
175 |
+
fulltable = globaldf[globaldf['Group'] == 'CulturalEvals']
|
176 |
+
fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
|
177 |
+
|
178 |
+
gr.Markdown("""Cultural values are specific to groups and sensitive content is normative. Sensitive topics also vary by culture and can include hate speech. What is considered a sensitive topic, such as egregious violence or adult sexual content, can vary widely by viewpoint. Due to norms differing by culture, region, and language, there is no standard for what constitutes sensitive content.
|
179 |
+
Distinct cultural values present a challenge for deploying models into a global sphere, as what may be appropriate in one culture may be unsafe in others. Generative AI systems cannot be neutral or objective, nor can they encompass truly universal values. There is no “view from nowhere”; in quantifying anything, a particular frame of reference is imposed.
|
180 |
+
""")
|
181 |
with gr.Row():
|
182 |
+
modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
|
183 |
+
value=["Text", "Image", "Audio", "Video"],
|
184 |
+
label="Modality",
|
185 |
+
show_label=True,
|
186 |
+
# info="Which modality to show."
|
187 |
+
)
|
188 |
+
level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
|
189 |
+
value=["Model", "Dataset", "Output", "Taxonomy"],
|
190 |
+
label="Level",
|
191 |
+
show_label=True,
|
192 |
+
# info="Which modality to show."
|
193 |
+
)
|
194 |
+
with gr.Row():
|
195 |
+
table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
|
196 |
+
table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
|
197 |
+
modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
198 |
+
level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
199 |
+
|
200 |
+
|
201 |
+
with Modal(visible=False) as modal:
|
202 |
+
titlemd = gr.Markdown(visible=False)
|
203 |
+
authormd = gr.Markdown(visible=False)
|
204 |
+
tagsmd = gr.Markdown(visible=False)
|
205 |
+
abstractmd = gr.Markdown(visible=False)
|
206 |
+
modelsmd = gr.Markdown(visible=False)
|
207 |
+
datasetmd = gr.Markdown(visible=False)
|
208 |
+
gallery = gr.Gallery(visible=False)
|
209 |
+
table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
|
210 |
+
|
211 |
+
|
212 |
|
213 |
# with gr.TabItem("Disparate Performance"):
|
214 |
# with gr.Row():
|
215 |
# gr.Image()
|
216 |
|
217 |
with gr.TabItem("Privacy/Data Protection"):
|
218 |
+
fulltable = globaldf[globaldf['Group'] == 'PrivacyEvals']
|
219 |
+
fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
|
220 |
+
|
221 |
+
gr.Markdown("""Cultural values are specific to groups and sensitive content is normative. Sensitive topics also vary by culture and can include hate speech. What is considered a sensitive topic, such as egregious violence or adult sexual content, can vary widely by viewpoint. Due to norms differing by culture, region, and language, there is no standard for what constitutes sensitive content.
|
222 |
+
Distinct cultural values present a challenge for deploying models into a global sphere, as what may be appropriate in one culture may be unsafe in others. Generative AI systems cannot be neutral or objective, nor can they encompass truly universal values. There is no “view from nowhere”; in quantifying anything, a particular frame of reference is imposed.
|
223 |
+
""")
|
224 |
with gr.Row():
|
225 |
+
modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
|
226 |
+
value=["Text", "Image", "Audio", "Video"],
|
227 |
+
label="Modality",
|
228 |
+
show_label=True,
|
229 |
+
# info="Which modality to show."
|
230 |
+
)
|
231 |
+
level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
|
232 |
+
value=["Model", "Dataset", "Output", "Taxonomy"],
|
233 |
+
label="Level",
|
234 |
+
show_label=True,
|
235 |
+
# info="Which modality to show."
|
236 |
+
)
|
237 |
+
with gr.Row():
|
238 |
+
table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
|
239 |
+
table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
|
240 |
+
modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
241 |
+
level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
|
242 |
+
|
243 |
+
|
244 |
+
with Modal(visible=False) as modal:
|
245 |
+
titlemd = gr.Markdown(visible=False)
|
246 |
+
authormd = gr.Markdown(visible=False)
|
247 |
+
tagsmd = gr.Markdown(visible=False)
|
248 |
+
abstractmd = gr.Markdown(visible=False)
|
249 |
+
modelsmd = gr.Markdown(visible=False)
|
250 |
+
datasetmd = gr.Markdown(visible=False)
|
251 |
+
gallery = gr.Gallery(visible=False)
|
252 |
+
table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
|
253 |
|
254 |
# with gr.TabItem("Financial Costs"):
|
255 |
# with gr.Row():
|
configs/crowspairs.yaml
CHANGED
@@ -14,6 +14,6 @@ Screenshots:
|
|
14 |
- Images/CrowsPairs1.png
|
15 |
- Images/CrowsPairs2.png
|
16 |
Suggested Evaluation: Crow-S Pairs
|
17 |
-
|
18 |
URL: https://arxiv.org/abs/2010.00133
|
19 |
What it is evaluating: Protected class stereotypes
|
|
|
14 |
- Images/CrowsPairs1.png
|
15 |
- Images/CrowsPairs2.png
|
16 |
Suggested Evaluation: Crow-S Pairs
|
17 |
+
Level: Dataset
|
18 |
URL: https://arxiv.org/abs/2010.00133
|
19 |
What it is evaluating: Protected class stereotypes
|
configs/homoglyphbias.yaml
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
Abstract: .nan
|
2 |
-
Applicable Models: .nan
|
3 |
-
Authors: .nan
|
4 |
-
Considerations: .nan
|
5 |
-
Datasets: .nan
|
6 |
-
Group: BiasEvals
|
7 |
-
Hashtags: .nan
|
8 |
-
Link: Exploiting Cultural Biases via Homoglyphs in Text-to-Image Synthesis
|
9 |
-
Modality: Image
|
10 |
-
Screenshots: []
|
11 |
-
Suggested Evaluation: Effect of different scripts on text-to-image generation
|
12 |
-
Type: Output
|
13 |
-
URL: https://arxiv.org/pdf/2209.08891.pdf
|
14 |
-
What it is evaluating: It evaluates generated images for cultural stereotypes, when
|
15 |
-
using different scripts (homoglyphs). It somewhat measures the suceptibility of
|
16 |
-
a model to produce cultural stereotypes by simply switching the script
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/honest.yaml
CHANGED
@@ -11,6 +11,6 @@ Link: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
|
|
11 |
Modality: Text
|
12 |
Screenshots: []
|
13 |
Suggested Evaluation: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
|
14 |
-
|
15 |
URL: https://aclanthology.org/2021.naacl-main.191.pdf
|
16 |
What it is evaluating: Protected class stereotypes and hurtful language
|
|
|
11 |
Modality: Text
|
12 |
Screenshots: []
|
13 |
Suggested Evaluation: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
|
14 |
+
Level: Output
|
15 |
URL: https://aclanthology.org/2021.naacl-main.191.pdf
|
16 |
What it is evaluating: Protected class stereotypes and hurtful language
|
configs/ieat.yaml
CHANGED
@@ -12,6 +12,6 @@ Link: Image Representations Learned With Unsupervised Pre-Training Contain Human
|
|
12 |
Modality: Image
|
13 |
Screenshots: []
|
14 |
Suggested Evaluation: Image Embedding Association Test (iEAT)
|
15 |
-
|
16 |
URL: https://dl.acm.org/doi/abs/10.1145/3442188.3445932
|
17 |
What it is evaluating: Embedding associations
|
|
|
12 |
Modality: Image
|
13 |
Screenshots: []
|
14 |
Suggested Evaluation: Image Embedding Association Test (iEAT)
|
15 |
+
Level: Model
|
16 |
URL: https://dl.acm.org/doi/abs/10.1145/3442188.3445932
|
17 |
What it is evaluating: Embedding associations
|
configs/imagedataleak.yaml
CHANGED
@@ -10,6 +10,6 @@ Link: 'Balanced Datasets Are Not Enough: Estimating and Mitigating Gender Bias i
|
|
10 |
Modality: Image
|
11 |
Screenshots: []
|
12 |
Suggested Evaluation: Dataset leakage and model leakage
|
13 |
-
|
14 |
URL: https://arxiv.org/abs/1811.08489
|
15 |
What it is evaluating: Gender and label bias
|
|
|
10 |
Modality: Image
|
11 |
Screenshots: []
|
12 |
Suggested Evaluation: Dataset leakage and model leakage
|
13 |
+
Level: Dataset
|
14 |
URL: https://arxiv.org/abs/1811.08489
|
15 |
What it is evaluating: Gender and label bias
|
configs/measuringforgetting.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Abstract: "Machine learning models exhibit two seemingly contradictory phenomena: training data memorization, and various forms of forgetting. In memorization, models overfit specific training examples and become susceptible to privacy attacks. In forgetting, examples which appeared early in training are forgotten by the end. In this work, we connect these phenomena. We propose a technique to measure to what extent models \"forget\" the specifics of training examples, becoming less susceptible to privacy attacks on examples they have not seen recently. We show that, while non-convex models can memorize data forever in the worst-case, standard image, speech, and language models empirically do forget examples over time. We identify nondeterminism as a potential explanation, showing that deterministically trained models do not forget. Our results suggest that examples seen early when training with extremely large datasets - for instance those examples used to pre-train a model - may observe privacy benefits at the expense of examples seen later."
|
2 |
+
Applicable Models:
|
3 |
+
- ResNet (Image)
|
4 |
+
- Conformer (Audio)
|
5 |
+
- T5 (Text)
|
6 |
+
Authors: Matthew Jagielski, Om Thakkar, Florian Tramèr, Daphne Ippolito, Katherine Lee, Nicholas Carlini, Eric Wallace, Shuang Song, Abhradeep Thakurta, Nicolas Papernot, Chiyuan Zhang
|
7 |
+
Considerations: .nan
|
8 |
+
Datasets: .nan
|
9 |
+
Group: PrivacyEvals
|
10 |
+
Hashtags: .nan
|
11 |
+
Link: 'Measuring Forgetting of Memorized Training Examples'
|
12 |
+
Modality: Text + Image + Audio
|
13 |
+
Screenshots:
|
14 |
+
- Images/Forgetting1.png
|
15 |
+
- Images/Forgetting2.png
|
16 |
+
Suggested Evaluation: Measuring forgetting of training examples
|
17 |
+
Level: Model
|
18 |
+
URL: https://arxiv.org/pdf/2207.00099.pdf
|
19 |
+
What it is evaluating: Measure whether models forget training examples over time, over different types of models (image, audio, text) and how order of training affects privacy attacks
|
configs/notmyvoice.yaml
CHANGED
@@ -11,6 +11,6 @@ Modality: Audio
|
|
11 |
Screenshots: []
|
12 |
Suggested Evaluation: Not My Voice! A Taxonomy of Ethical and Safety Harms of Speech
|
13 |
Generators
|
14 |
-
|
15 |
URL: https://arxiv.org/pdf/2402.01708.pdf
|
16 |
What it is evaluating: Lists harms of audio/speech generators
|
|
|
11 |
Screenshots: []
|
12 |
Suggested Evaluation: Not My Voice! A Taxonomy of Ethical and Safety Harms of Speech
|
13 |
Generators
|
14 |
+
Level: Taxonomy
|
15 |
URL: https://arxiv.org/pdf/2402.01708.pdf
|
16 |
What it is evaluating: Lists harms of audio/speech generators
|
configs/palms.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Abstract: "Language models can generate harmful and biased outputs and exhibit undesirable behavior according to a given cultural context. We propose a Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets, an iterative process to significantly change model behavior by crafting and fine-tuning on a dataset that reflects a predetermined set of target values. We evaluate our process using three metrics: quantitative metrics with human evaluations that score output adherence to a target value, toxicity scoring on outputs; and qualitative metrics analyzing the most common word associated with a given social category. Through each iteration, we add additional training dataset examples based on observed shortcomings from evaluations. PALMS performs significantly better on all metrics compared to baseline and control models for a broad range of GPT-3 language model sizes without compromising capability integrity. We find that the effectiveness of PALMS increases with model size. We show that significantly adjusting language model behavior is feasible with a small, hand-curated dataset."
|
2 |
+
Applicable Models: .nan
|
3 |
+
Authors: Irene Solaiman, Christy Dennison
|
4 |
+
Considerations: Requires predefining what adherence to a culture means for human evals
|
5 |
+
Datasets: .nan
|
6 |
+
Group: CulturalEvals
|
7 |
+
Hashtags: .nan
|
8 |
+
Link: 'Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets'
|
9 |
+
Modality: Text
|
10 |
+
Screenshots: .nan
|
11 |
+
Suggested Evaluation: Human and Toxicity Evals of Cultural Value Categories
|
12 |
+
Level: Output
|
13 |
+
URL: http://arxiv.org/abs/2106.10328
|
14 |
+
What it is evaluating: Adherence to defined norms for a set of cultural categories
|
configs/safelatentdiff.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Abstract: "Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer, as we demonstrate, from degenerated and biased human behavior. In turn, they may even reinforce such biases. To help combat these undesired side effects, we present safe latent diffusion (SLD). Specifically, to measure the inappropriate degeneration due to unfiltered and imbalanced training sets, we establish a novel image generation test bed-inappropriate image prompts (I2P)-containing dedicated, real-world image-to-text prompts covering concepts such as nudity and violence. As our exhaustive empirical evaluation demonstrates, the introduced SLD removes and suppresses inappropriate image parts during the diffusion process, with no additional training required and no adverse effect on overall image quality or text alignment."
|
2 |
+
Applicable Models:
|
3 |
+
- Stable Diffusion
|
4 |
+
Authors: Patrick Schramowski, Manuel Brack, Björn Deiseroth, Kristian Kersting
|
5 |
+
Considerations: What is considered appropriate and inappropriate varies strongly across cultures and is very context dependent
|
6 |
+
Datasets: https://huggingface.co/datasets/AIML-TUDA/i2p
|
7 |
+
Group: CulturalEvals
|
8 |
+
Hashtags: .nan
|
9 |
+
Link: 'Safe Latent Diffusion: Mitigating Inappropriate Degeneration in Diffusion Models'
|
10 |
+
Modality: Image
|
11 |
+
Screenshots:
|
12 |
+
- Images/SLD1.png
|
13 |
+
- Images/SLD2.png
|
14 |
+
Suggested Evaluation: Evaluating text-to-image models for safety
|
15 |
+
Level: Output
|
16 |
+
URL: https://arxiv.org/pdf/2211.05105.pdf
|
17 |
+
What it is evaluating: Generating images for diverse set of prompts (novel I2P benchmark) and investigating how often e.g. violent/nude images will be generated. There is a distinction between implicit and explicit safety, i.e. unsafe results with “normal” prompts.
|
configs/stablebias.yaml
CHANGED
@@ -9,6 +9,6 @@ Link: 'Stable bias: Analyzing societal representations in diffusion models'
|
|
9 |
Modality: Image
|
10 |
Screenshots: []
|
11 |
Suggested Evaluation: Characterizing the variation in generated images
|
12 |
-
|
13 |
URL: https://arxiv.org/abs/2303.11408
|
14 |
What it is evaluating: .nan
|
|
|
9 |
Modality: Image
|
10 |
Screenshots: []
|
11 |
Suggested Evaluation: Characterizing the variation in generated images
|
12 |
+
Level: Output
|
13 |
URL: https://arxiv.org/abs/2303.11408
|
14 |
What it is evaluating: .nan
|
configs/stereoset.yaml
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
Abstract: .nan
|
2 |
-
Applicable Models: .nan
|
3 |
-
Authors: .nan
|
4 |
-
Considerations: Automating stereotype detection makes distinguishing harmful stereotypes
|
5 |
-
difficult. It also raises many false positives and can flag relatively neutral associations
|
6 |
-
based in fact (e.g. population x has a high proportion of lactose intolerant people).
|
7 |
-
Datasets: .nan
|
8 |
-
Group: BiasEvals
|
9 |
-
Hashtags: .nan
|
10 |
-
Link: 'StereoSet: Measuring stereotypical bias in pretrained language models'
|
11 |
-
Modality: Text
|
12 |
-
Screenshots: []
|
13 |
-
Suggested Evaluation: StereoSet
|
14 |
-
Type: Dataset
|
15 |
-
URL: https://arxiv.org/abs/2004.09456
|
16 |
-
What it is evaluating: Protected class stereotypes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/tango.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Abstract: "Transgender and non-binary (TGNB) individuals disproportionately experience discrimination and exclusion from daily life. Given the recent popularity and adoption of language generation technologies, the potential to further marginalize this population only grows. Although a multitude of NLP fairness literature focuses on illuminating and addressing gender biases, assessing gender harms for TGNB identities requires understanding how such identities uniquely interact with societal gender norms and how they differ from gender binary-centric perspectives. Such measurement frameworks inherently require centering TGNB voices to help guide the alignment between gender-inclusive NLP and whom they are intended to serve. Towards this goal, we ground our work in the TGNB community and existing interdisciplinary literature to assess how the social reality surrounding experienced marginalization of TGNB persons contributes to and persists within Open Language Generation (OLG). This social knowledge serves as a guide for evaluating popular large language models (LLMs) on two key aspects: (1) misgendering and (2) harmful responses to gender disclosure. To do this, we introduce TANGO, a dataset of template-based real-world text curated from a TGNB-oriented community. We discover a dominance of binary gender norms reflected by the models; LLMs least misgendered subjects in generated text when triggered by prompts whose subjects used binary pronouns. Meanwhile, misgendering was most prevalent when triggering generation with singular they and neopronouns. When prompted with gender disclosures, TGNB disclosure generated the most stigmatizing language and scored most toxic, on average. Our findings warrant further research on how TGNB harms manifest in LLMs and serve as a broader case study toward concretely grounding the design of gender-inclusive AI in community voices and interdisciplinary literature."
|
2 |
+
Applicable Models:
|
3 |
+
- GPT-2
|
4 |
+
- GPT-Neo
|
5 |
+
- OPT
|
6 |
+
Authors: Anaelia Ovalle, Palash Goyal, Jwala Dhamala, Zachary Jaggers, Kai-Wei Chang, Aram Galstyan, Richard Zemel, Rahul Gupta
|
7 |
+
Considerations: Based on automatic evaluations of the resulting open language generation, may be sensitive to the choice of evaluator. Would advice for a combination of perspective, detoxify, and regard metrics
|
8 |
+
Datasets: https://huggingface.co/datasets/AlexaAI/TANGO
|
9 |
+
Group: CulturalEvals
|
10 |
+
Hashtags: .nan
|
11 |
+
Link: '“I’m fully who I am”: Towards Centering Transgender and Non-Binary Voices to Measure Biases in Open Language Generation'
|
12 |
+
Modality: Text
|
13 |
+
Screenshots:
|
14 |
+
- Images/TANGO1.png
|
15 |
+
- Images/TANGO2.png
|
16 |
+
Suggested Evaluation: Human and Toxicity Evals of Cultural Value Categories
|
17 |
+
Level: Output
|
18 |
+
URL: http://arxiv.org/abs/2106.10328
|
19 |
+
What it is evaluating: Bias measurement for trans and nonbinary community via measuring gender non-affirmative language, specifically 1) misgendering 2), negative responses to gender disclosure
|
configs/videodiversemisinfo.yaml
CHANGED
@@ -13,7 +13,7 @@ Modality: Video
|
|
13 |
Screenshots: []
|
14 |
Suggested Evaluation: 'Diverse Misinformation: Impacts of Human Biases on Detection
|
15 |
of Deepfakes on Networks'
|
16 |
-
|
17 |
URL: https://arxiv.org/abs/2210.10026
|
18 |
What it is evaluating: Human led evaluations of deepfakes to understand susceptibility
|
19 |
and representational harms (including political violence)
|
|
|
13 |
Screenshots: []
|
14 |
Suggested Evaluation: 'Diverse Misinformation: Impacts of Human Biases on Detection
|
15 |
of Deepfakes on Networks'
|
16 |
+
Level: Output
|
17 |
URL: https://arxiv.org/abs/2210.10026
|
18 |
What it is evaluating: Human led evaluations of deepfakes to understand susceptibility
|
19 |
and representational harms (including political violence)
|
configs/weat.yaml
CHANGED
@@ -36,7 +36,7 @@ Screenshots:
|
|
36 |
- Images/WEAT1.png
|
37 |
- Images/WEAT2.png
|
38 |
Suggested Evaluation: Word Embedding Association Test (WEAT)
|
39 |
-
|
40 |
URL: https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily
|
41 |
What it is evaluating: Associations and word embeddings based on Implicit Associations
|
42 |
Test (IAT)
|
|
|
36 |
- Images/WEAT1.png
|
37 |
- Images/WEAT2.png
|
38 |
Suggested Evaluation: Word Embedding Association Test (WEAT)
|
39 |
+
Level: Model
|
40 |
URL: https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily
|
41 |
What it is evaluating: Associations and word embeddings based on Implicit Associations
|
42 |
Test (IAT)
|