Avijit Ghosh commited on
Commit
61b63e9
·
2 Parent(s): 6c70e1e 2ec74bb
Images/Forgetting1.png ADDED
Images/Forgetting2.png ADDED
Images/SLD1.png ADDED
Images/SLD2.png ADDED
Images/TANGO1.png ADDED
Images/TANGO2.png ADDED
Images/WEAT1.png CHANGED
Images/WEAT2.png CHANGED
__pycache__/css.cpython-312.pyc CHANGED
Binary files a/__pycache__/css.cpython-312.pyc and b/__pycache__/css.cpython-312.pyc differ
 
app.py CHANGED
@@ -4,13 +4,31 @@ import pandas as pd
4
  from gradio_modal import Modal
5
  import os
6
  import yaml
7
-
8
 
9
  folder_path = 'configs'
10
  # List to store data from YAML files
11
  data_list = []
12
  metadata_dict = {}
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Iterate over each file in the folder
15
  for filename in os.listdir(folder_path):
16
  if filename.endswith('.yaml'):
@@ -27,25 +45,24 @@ globaldf['Link'] = '<u>'+globaldf['Link']+'</u>'
27
 
28
  # Define the desired order of categories
29
  modality_order = ["Text", "Image", "Audio", "Video"]
30
- type_order = ["Model", "Dataset", "Output", "Taxonomy"]
31
 
32
- # Convert Modality and Type columns to categorical with specified order
 
 
 
33
  globaldf['Modality'] = pd.Categorical(globaldf['Modality'], categories=modality_order, ordered=True)
34
- globaldf['Type'] = pd.Categorical(globaldf['Type'], categories=type_order, ordered=True)
35
 
36
- # Sort DataFrame by Modality and Type
37
- globaldf.sort_values(by=['Modality', 'Type'], inplace=True)
38
 
39
  # create a gradio page with tabs and accordions
40
 
41
  # Path: taxonomy.py
42
 
43
- def filter_modality(filteredtable, modality_filter):
44
- filteredtable = filteredtable[filteredtable['Modality'].isin(modality_filter)]
45
- return filteredtable
46
-
47
- def filter_type(filteredtable, modality_filter):
48
- filteredtable = filteredtable[filteredtable['Type'].isin(modality_filter)]
49
  return filteredtable
50
 
51
  def showmodal(evt: gr.SelectData):
@@ -55,6 +72,7 @@ def showmodal(evt: gr.SelectData):
55
  authormd = gr.Markdown("",visible=False)
56
  tagsmd = gr.Markdown("",visible=False)
57
  abstractmd = gr.Markdown("",visible=False)
 
58
  datasetmd = gr.Markdown("",visible=False)
59
  gallery = gr.Gallery([],visible=False)
60
  if evt.index[1] == 5:
@@ -67,6 +85,12 @@ def showmodal(evt: gr.SelectData):
67
  tagstr = ''.join(['<span class="tag">#'+tag+'</span> ' for tag in tags])
68
  tagsmd = gr.Markdown(tagstr, visible=True)
69
 
 
 
 
 
 
 
70
  titlemd = gr.Markdown('# ['+itemdic['Link']+']('+itemdic['URL']+')',visible=True)
71
 
72
  if pd.notnull(itemdic['Authors']):
@@ -83,7 +107,7 @@ def showmodal(evt: gr.SelectData):
83
  if len(screenshots) > 0:
84
  gallery = gr.Gallery(screenshots, visible=True)
85
 
86
- return [modal, titlemd, authormd, tagsmd, abstractmd, datasetmd, gallery]
87
 
88
  with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.themes.Base()) as demo: #theme=gr.themes.Soft(),
89
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
@@ -96,18 +120,18 @@ with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.
96
  gr.Markdown("""
97
  #### Technical Base System Evaluations:
98
 
99
- Below we list the aspects possible to evaluate in a generative system. Context-absent evaluations only provide narrow insights into the described aspects of the type of generative AI system. The depth of literature and research on evaluations differ by modality with some modalities having sparse or no relevant literature, but the themes for evaluations can be applied to most systems.
100
 
101
  The following categories are high-level, non-exhaustive, and present a synthesis of the findings across different modalities. They refer solely to what can be evaluated in a base technical system:
102
 
103
  """)
104
  with gr.Tabs(elem_classes="tab-buttons") as tabs1:
105
- with gr.TabItem("Bias/Stereotypes"):
106
  fulltable = globaldf[globaldf['Group'] == 'BiasEvals']
107
- fulltable = fulltable[['Modality','Type', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
108
 
109
  gr.Markdown("""
110
- Generative AI systems can perpetuate harmful biases from various sources, including systemic, human, and statistical biases. These biases, also known as "fairness" considerations, can manifest in the final system due to choices made throughout the development process. They include harmful associations and stereotypes related to protected classes, such as race, gender, and sexuality. Evaluating biases involves assessing correlations, co-occurrences, sentiment, and toxicity across different modalities, both within the model itself and in the outputs of downstream tasks.
111
  """)
112
  with gr.Row():
113
  modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
@@ -116,17 +140,17 @@ The following categories are high-level, non-exhaustive, and present a synthesis
116
  show_label=True,
117
  # info="Which modality to show."
118
  )
119
- type_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
120
  value=["Model", "Dataset", "Output", "Taxonomy"],
121
- label="Type",
122
  show_label=True,
123
  # info="Which modality to show."
124
  )
125
  with gr.Row():
126
- biastable_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
127
- biastable_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
128
- modality_filter.change(filter_modality, inputs=[biastable_filtered, modality_filter], outputs=biastable_filtered)
129
- type_filter.change(filter_type, inputs=[biastable_filtered, type_filter], outputs=biastable_filtered)
130
 
131
 
132
  with Modal(visible=False) as modal:
@@ -138,25 +162,94 @@ The following categories are high-level, non-exhaustive, and present a synthesis
138
  gr.Markdown("### What it is evaluating", visible=True)
139
  gr.Markdown('## Resources', visible=True)
140
  gr.Markdown('### What you need to do this evaluation', visible=True)
 
141
  datasetmd = gr.Markdown(visible=False)
142
  gr.Markdown("## Results", visible=True)
143
  gr.Markdown("### Metrics", visible=True)
144
  gallery = gr.Gallery(visible=False)
145
- biastable_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, datasetmd, gallery])
146
 
147
 
148
 
149
  with gr.TabItem("Cultural Values/Sensitive Content"):
 
 
 
 
 
 
150
  with gr.Row():
151
- gr.Image()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  # with gr.TabItem("Disparate Performance"):
154
  # with gr.Row():
155
  # gr.Image()
156
 
157
  with gr.TabItem("Privacy/Data Protection"):
 
 
 
 
 
 
158
  with gr.Row():
159
- gr.Image()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  # with gr.TabItem("Financial Costs"):
162
  # with gr.Row():
 
4
  from gradio_modal import Modal
5
  import os
6
  import yaml
7
+ import itertools
8
 
9
  folder_path = 'configs'
10
  # List to store data from YAML files
11
  data_list = []
12
  metadata_dict = {}
13
 
14
+
15
+ def expand_string_list(string_list):
16
+ expanded_list = []
17
+
18
+ # Add individual strings to the expanded list
19
+ expanded_list.extend(string_list)
20
+
21
+ # Generate combinations of different lengths from the input list
22
+ for r in range(2, len(string_list) + 1):
23
+ combinations = itertools.combinations(string_list, r)
24
+ for combination in combinations:
25
+ # Generate permutations of each combination
26
+ permutations = itertools.permutations(combination)
27
+ for permutation in permutations:
28
+ expanded_list.append(' + '.join(permutation))
29
+
30
+ return expanded_list
31
+
32
  # Iterate over each file in the folder
33
  for filename in os.listdir(folder_path):
34
  if filename.endswith('.yaml'):
 
45
 
46
  # Define the desired order of categories
47
  modality_order = ["Text", "Image", "Audio", "Video"]
48
+ level_order = ["Model", "Dataset", "Output", "Taxonomy"]
49
 
50
+ modality_order = expand_string_list(modality_order)
51
+ level_order = expand_string_list(level_order)
52
+
53
+ # Convert Modality and Level columns to categorical with specified order
54
  globaldf['Modality'] = pd.Categorical(globaldf['Modality'], categories=modality_order, ordered=True)
55
+ globaldf['Level'] = pd.Categorical(globaldf['Level'], categories=level_order, ordered=True)
56
 
57
+ # Sort DataFrame by Modality and Level
58
+ globaldf.sort_values(by=['Modality', 'Level'], inplace=True)
59
 
60
  # create a gradio page with tabs and accordions
61
 
62
  # Path: taxonomy.py
63
 
64
+ def filter_modality_level(fulltable, modality_filter, level_filter):
65
+ filteredtable = fulltable[fulltable['Modality'].str.contains('|'.join(modality_filter)) & fulltable['Level'].str.contains('|'.join(level_filter))]
 
 
 
 
66
  return filteredtable
67
 
68
  def showmodal(evt: gr.SelectData):
 
72
  authormd = gr.Markdown("",visible=False)
73
  tagsmd = gr.Markdown("",visible=False)
74
  abstractmd = gr.Markdown("",visible=False)
75
+ modelsmd = gr.Markdown("",visible=False)
76
  datasetmd = gr.Markdown("",visible=False)
77
  gallery = gr.Gallery([],visible=False)
78
  if evt.index[1] == 5:
 
85
  tagstr = ''.join(['<span class="tag">#'+tag+'</span> ' for tag in tags])
86
  tagsmd = gr.Markdown(tagstr, visible=True)
87
 
88
+ models = itemdic['Applicable Models']
89
+ if isinstance(models, list):
90
+ if len(models) > 0:
91
+ modelstr = '### Applicable Models: '+''.join(['<span class="tag">'+model+'</span> ' for model in models])
92
+ modelsmd = gr.Markdown(modelstr, visible=True)
93
+
94
  titlemd = gr.Markdown('# ['+itemdic['Link']+']('+itemdic['URL']+')',visible=True)
95
 
96
  if pd.notnull(itemdic['Authors']):
 
107
  if len(screenshots) > 0:
108
  gallery = gr.Gallery(screenshots, visible=True)
109
 
110
+ return [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery]
111
 
112
  with gr.Blocks(title = "Social Impact Measurement V2", css=custom_css, theme=gr.themes.Base()) as demo: #theme=gr.themes.Soft(),
113
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
 
120
  gr.Markdown("""
121
  #### Technical Base System Evaluations:
122
 
123
+ Below we list the aspects possible to evaluate in a generative system. Context-absent evaluations only provide narrow insights into the described aspects of the level of generative AI system. The depth of literature and research on evaluations differ by modality with some modalities having sparse or no relevant literature, but the themes for evaluations can be applied to most systems.
124
 
125
  The following categories are high-level, non-exhaustive, and present a synthesis of the findings across different modalities. They refer solely to what can be evaluated in a base technical system:
126
 
127
  """)
128
  with gr.Tabs(elem_classes="tab-buttons") as tabs1:
129
+ with gr.TabItem("Bias/Stereolevels"):
130
  fulltable = globaldf[globaldf['Group'] == 'BiasEvals']
131
+ fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
132
 
133
  gr.Markdown("""
134
+ Generative AI systems can perpetuate harmful biases from various sources, including systemic, human, and statistical biases. These biases, also known as "fairness" considerations, can manifest in the final system due to choices made throughout the development process. They include harmful associations and stereolevels related to protected classes, such as race, gender, and sexuality. Evaluating biases involves assessing correlations, co-occurrences, sentiment, and toxicity across different modalities, both within the model itself and in the outputs of downstream tasks.
135
  """)
136
  with gr.Row():
137
  modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
 
140
  show_label=True,
141
  # info="Which modality to show."
142
  )
143
+ level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
144
  value=["Model", "Dataset", "Output", "Taxonomy"],
145
+ label="Level",
146
  show_label=True,
147
  # info="Which modality to show."
148
  )
149
  with gr.Row():
150
+ table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
151
+ table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
152
+ modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
153
+ level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
154
 
155
 
156
  with Modal(visible=False) as modal:
 
162
  gr.Markdown("### What it is evaluating", visible=True)
163
  gr.Markdown('## Resources', visible=True)
164
  gr.Markdown('### What you need to do this evaluation', visible=True)
165
+ modelsmd = gr.Markdown(visible=False)
166
  datasetmd = gr.Markdown(visible=False)
167
  gr.Markdown("## Results", visible=True)
168
  gr.Markdown("### Metrics", visible=True)
169
  gallery = gr.Gallery(visible=False)
170
+ table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
171
 
172
 
173
 
174
  with gr.TabItem("Cultural Values/Sensitive Content"):
175
+ fulltable = globaldf[globaldf['Group'] == 'CulturalEvals']
176
+ fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
177
+
178
+ gr.Markdown("""Cultural values are specific to groups and sensitive content is normative. Sensitive topics also vary by culture and can include hate speech. What is considered a sensitive topic, such as egregious violence or adult sexual content, can vary widely by viewpoint. Due to norms differing by culture, region, and language, there is no standard for what constitutes sensitive content.
179
+ Distinct cultural values present a challenge for deploying models into a global sphere, as what may be appropriate in one culture may be unsafe in others. Generative AI systems cannot be neutral or objective, nor can they encompass truly universal values. There is no “view from nowhere”; in quantifying anything, a particular frame of reference is imposed.
180
+ """)
181
  with gr.Row():
182
+ modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
183
+ value=["Text", "Image", "Audio", "Video"],
184
+ label="Modality",
185
+ show_label=True,
186
+ # info="Which modality to show."
187
+ )
188
+ level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
189
+ value=["Model", "Dataset", "Output", "Taxonomy"],
190
+ label="Level",
191
+ show_label=True,
192
+ # info="Which modality to show."
193
+ )
194
+ with gr.Row():
195
+ table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
196
+ table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
197
+ modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
198
+ level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
199
+
200
+
201
+ with Modal(visible=False) as modal:
202
+ titlemd = gr.Markdown(visible=False)
203
+ authormd = gr.Markdown(visible=False)
204
+ tagsmd = gr.Markdown(visible=False)
205
+ abstractmd = gr.Markdown(visible=False)
206
+ modelsmd = gr.Markdown(visible=False)
207
+ datasetmd = gr.Markdown(visible=False)
208
+ gallery = gr.Gallery(visible=False)
209
+ table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
210
+
211
+
212
 
213
  # with gr.TabItem("Disparate Performance"):
214
  # with gr.Row():
215
  # gr.Image()
216
 
217
  with gr.TabItem("Privacy/Data Protection"):
218
+ fulltable = globaldf[globaldf['Group'] == 'PrivacyEvals']
219
+ fulltable = fulltable[['Modality','Level', 'Suggested Evaluation', 'What it is evaluating', 'Considerations', 'Link']]
220
+
221
+ gr.Markdown("""Cultural values are specific to groups and sensitive content is normative. Sensitive topics also vary by culture and can include hate speech. What is considered a sensitive topic, such as egregious violence or adult sexual content, can vary widely by viewpoint. Due to norms differing by culture, region, and language, there is no standard for what constitutes sensitive content.
222
+ Distinct cultural values present a challenge for deploying models into a global sphere, as what may be appropriate in one culture may be unsafe in others. Generative AI systems cannot be neutral or objective, nor can they encompass truly universal values. There is no “view from nowhere”; in quantifying anything, a particular frame of reference is imposed.
223
+ """)
224
  with gr.Row():
225
+ modality_filter = gr.CheckboxGroup(["Text", "Image", "Audio", "Video"],
226
+ value=["Text", "Image", "Audio", "Video"],
227
+ label="Modality",
228
+ show_label=True,
229
+ # info="Which modality to show."
230
+ )
231
+ level_filter = gr.CheckboxGroup(["Model", "Dataset", "Output", "Taxonomy"],
232
+ value=["Model", "Dataset", "Output", "Taxonomy"],
233
+ label="Level",
234
+ show_label=True,
235
+ # info="Which modality to show."
236
+ )
237
+ with gr.Row():
238
+ table_full = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=False, interactive=False)
239
+ table_filtered = gr.DataFrame(value=fulltable, wrap=True, datatype="markdown", visible=True, interactive=False)
240
+ modality_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
241
+ level_filter.change(filter_modality_level, inputs=[table_full, modality_filter, level_filter], outputs=table_filtered)
242
+
243
+
244
+ with Modal(visible=False) as modal:
245
+ titlemd = gr.Markdown(visible=False)
246
+ authormd = gr.Markdown(visible=False)
247
+ tagsmd = gr.Markdown(visible=False)
248
+ abstractmd = gr.Markdown(visible=False)
249
+ modelsmd = gr.Markdown(visible=False)
250
+ datasetmd = gr.Markdown(visible=False)
251
+ gallery = gr.Gallery(visible=False)
252
+ table_filtered.select(showmodal, None, [modal, titlemd, authormd, tagsmd, abstractmd, modelsmd, datasetmd, gallery])
253
 
254
  # with gr.TabItem("Financial Costs"):
255
  # with gr.Row():
configs/crowspairs.yaml CHANGED
@@ -14,6 +14,6 @@ Screenshots:
14
  - Images/CrowsPairs1.png
15
  - Images/CrowsPairs2.png
16
  Suggested Evaluation: Crow-S Pairs
17
- Type: Dataset
18
  URL: https://arxiv.org/abs/2010.00133
19
  What it is evaluating: Protected class stereotypes
 
14
  - Images/CrowsPairs1.png
15
  - Images/CrowsPairs2.png
16
  Suggested Evaluation: Crow-S Pairs
17
+ Level: Dataset
18
  URL: https://arxiv.org/abs/2010.00133
19
  What it is evaluating: Protected class stereotypes
configs/homoglyphbias.yaml DELETED
@@ -1,16 +0,0 @@
1
- Abstract: .nan
2
- Applicable Models: .nan
3
- Authors: .nan
4
- Considerations: .nan
5
- Datasets: .nan
6
- Group: BiasEvals
7
- Hashtags: .nan
8
- Link: Exploiting Cultural Biases via Homoglyphs in Text-to-Image Synthesis
9
- Modality: Image
10
- Screenshots: []
11
- Suggested Evaluation: Effect of different scripts on text-to-image generation
12
- Type: Output
13
- URL: https://arxiv.org/pdf/2209.08891.pdf
14
- What it is evaluating: It evaluates generated images for cultural stereotypes, when
15
- using different scripts (homoglyphs). It somewhat measures the suceptibility of
16
- a model to produce cultural stereotypes by simply switching the script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/honest.yaml CHANGED
@@ -11,6 +11,6 @@ Link: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
11
  Modality: Text
12
  Screenshots: []
13
  Suggested Evaluation: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
14
- Type: Output
15
  URL: https://aclanthology.org/2021.naacl-main.191.pdf
16
  What it is evaluating: Protected class stereotypes and hurtful language
 
11
  Modality: Text
12
  Screenshots: []
13
  Suggested Evaluation: 'HONEST: Measuring Hurtful Sentence Completion in Language Models'
14
+ Level: Output
15
  URL: https://aclanthology.org/2021.naacl-main.191.pdf
16
  What it is evaluating: Protected class stereotypes and hurtful language
configs/ieat.yaml CHANGED
@@ -12,6 +12,6 @@ Link: Image Representations Learned With Unsupervised Pre-Training Contain Human
12
  Modality: Image
13
  Screenshots: []
14
  Suggested Evaluation: Image Embedding Association Test (iEAT)
15
- Type: Model
16
  URL: https://dl.acm.org/doi/abs/10.1145/3442188.3445932
17
  What it is evaluating: Embedding associations
 
12
  Modality: Image
13
  Screenshots: []
14
  Suggested Evaluation: Image Embedding Association Test (iEAT)
15
+ Level: Model
16
  URL: https://dl.acm.org/doi/abs/10.1145/3442188.3445932
17
  What it is evaluating: Embedding associations
configs/imagedataleak.yaml CHANGED
@@ -10,6 +10,6 @@ Link: 'Balanced Datasets Are Not Enough: Estimating and Mitigating Gender Bias i
10
  Modality: Image
11
  Screenshots: []
12
  Suggested Evaluation: Dataset leakage and model leakage
13
- Type: Dataset
14
  URL: https://arxiv.org/abs/1811.08489
15
  What it is evaluating: Gender and label bias
 
10
  Modality: Image
11
  Screenshots: []
12
  Suggested Evaluation: Dataset leakage and model leakage
13
+ Level: Dataset
14
  URL: https://arxiv.org/abs/1811.08489
15
  What it is evaluating: Gender and label bias
configs/measuringforgetting.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Abstract: "Machine learning models exhibit two seemingly contradictory phenomena: training data memorization, and various forms of forgetting. In memorization, models overfit specific training examples and become susceptible to privacy attacks. In forgetting, examples which appeared early in training are forgotten by the end. In this work, we connect these phenomena. We propose a technique to measure to what extent models \"forget\" the specifics of training examples, becoming less susceptible to privacy attacks on examples they have not seen recently. We show that, while non-convex models can memorize data forever in the worst-case, standard image, speech, and language models empirically do forget examples over time. We identify nondeterminism as a potential explanation, showing that deterministically trained models do not forget. Our results suggest that examples seen early when training with extremely large datasets - for instance those examples used to pre-train a model - may observe privacy benefits at the expense of examples seen later."
2
+ Applicable Models:
3
+ - ResNet (Image)
4
+ - Conformer (Audio)
5
+ - T5 (Text)
6
+ Authors: Matthew Jagielski, Om Thakkar, Florian Tramèr, Daphne Ippolito, Katherine Lee, Nicholas Carlini, Eric Wallace, Shuang Song, Abhradeep Thakurta, Nicolas Papernot, Chiyuan Zhang
7
+ Considerations: .nan
8
+ Datasets: .nan
9
+ Group: PrivacyEvals
10
+ Hashtags: .nan
11
+ Link: 'Measuring Forgetting of Memorized Training Examples'
12
+ Modality: Text + Image + Audio
13
+ Screenshots:
14
+ - Images/Forgetting1.png
15
+ - Images/Forgetting2.png
16
+ Suggested Evaluation: Measuring forgetting of training examples
17
+ Level: Model
18
+ URL: https://arxiv.org/pdf/2207.00099.pdf
19
+ What it is evaluating: Measure whether models forget training examples over time, over different types of models (image, audio, text) and how order of training affects privacy attacks
configs/notmyvoice.yaml CHANGED
@@ -11,6 +11,6 @@ Modality: Audio
11
  Screenshots: []
12
  Suggested Evaluation: Not My Voice! A Taxonomy of Ethical and Safety Harms of Speech
13
  Generators
14
- Type: Taxonomy
15
  URL: https://arxiv.org/pdf/2402.01708.pdf
16
  What it is evaluating: Lists harms of audio/speech generators
 
11
  Screenshots: []
12
  Suggested Evaluation: Not My Voice! A Taxonomy of Ethical and Safety Harms of Speech
13
  Generators
14
+ Level: Taxonomy
15
  URL: https://arxiv.org/pdf/2402.01708.pdf
16
  What it is evaluating: Lists harms of audio/speech generators
configs/palms.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Abstract: "Language models can generate harmful and biased outputs and exhibit undesirable behavior according to a given cultural context. We propose a Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets, an iterative process to significantly change model behavior by crafting and fine-tuning on a dataset that reflects a predetermined set of target values. We evaluate our process using three metrics: quantitative metrics with human evaluations that score output adherence to a target value, toxicity scoring on outputs; and qualitative metrics analyzing the most common word associated with a given social category. Through each iteration, we add additional training dataset examples based on observed shortcomings from evaluations. PALMS performs significantly better on all metrics compared to baseline and control models for a broad range of GPT-3 language model sizes without compromising capability integrity. We find that the effectiveness of PALMS increases with model size. We show that significantly adjusting language model behavior is feasible with a small, hand-curated dataset."
2
+ Applicable Models: .nan
3
+ Authors: Irene Solaiman, Christy Dennison
4
+ Considerations: Requires predefining what adherence to a culture means for human evals
5
+ Datasets: .nan
6
+ Group: CulturalEvals
7
+ Hashtags: .nan
8
+ Link: 'Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets'
9
+ Modality: Text
10
+ Screenshots: .nan
11
+ Suggested Evaluation: Human and Toxicity Evals of Cultural Value Categories
12
+ Level: Output
13
+ URL: http://arxiv.org/abs/2106.10328
14
+ What it is evaluating: Adherence to defined norms for a set of cultural categories
configs/safelatentdiff.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Abstract: "Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer, as we demonstrate, from degenerated and biased human behavior. In turn, they may even reinforce such biases. To help combat these undesired side effects, we present safe latent diffusion (SLD). Specifically, to measure the inappropriate degeneration due to unfiltered and imbalanced training sets, we establish a novel image generation test bed-inappropriate image prompts (I2P)-containing dedicated, real-world image-to-text prompts covering concepts such as nudity and violence. As our exhaustive empirical evaluation demonstrates, the introduced SLD removes and suppresses inappropriate image parts during the diffusion process, with no additional training required and no adverse effect on overall image quality or text alignment."
2
+ Applicable Models:
3
+ - Stable Diffusion
4
+ Authors: Patrick Schramowski, Manuel Brack, Björn Deiseroth, Kristian Kersting
5
+ Considerations: What is considered appropriate and inappropriate varies strongly across cultures and is very context dependent
6
+ Datasets: https://huggingface.co/datasets/AIML-TUDA/i2p
7
+ Group: CulturalEvals
8
+ Hashtags: .nan
9
+ Link: 'Safe Latent Diffusion: Mitigating Inappropriate Degeneration in Diffusion Models'
10
+ Modality: Image
11
+ Screenshots:
12
+ - Images/SLD1.png
13
+ - Images/SLD2.png
14
+ Suggested Evaluation: Evaluating text-to-image models for safety
15
+ Level: Output
16
+ URL: https://arxiv.org/pdf/2211.05105.pdf
17
+ What it is evaluating: Generating images for diverse set of prompts (novel I2P benchmark) and investigating how often e.g. violent/nude images will be generated. There is a distinction between implicit and explicit safety, i.e. unsafe results with “normal” prompts.
configs/stablebias.yaml CHANGED
@@ -9,6 +9,6 @@ Link: 'Stable bias: Analyzing societal representations in diffusion models'
9
  Modality: Image
10
  Screenshots: []
11
  Suggested Evaluation: Characterizing the variation in generated images
12
- Type: Output
13
  URL: https://arxiv.org/abs/2303.11408
14
  What it is evaluating: .nan
 
9
  Modality: Image
10
  Screenshots: []
11
  Suggested Evaluation: Characterizing the variation in generated images
12
+ Level: Output
13
  URL: https://arxiv.org/abs/2303.11408
14
  What it is evaluating: .nan
configs/stereoset.yaml DELETED
@@ -1,16 +0,0 @@
1
- Abstract: .nan
2
- Applicable Models: .nan
3
- Authors: .nan
4
- Considerations: Automating stereotype detection makes distinguishing harmful stereotypes
5
- difficult. It also raises many false positives and can flag relatively neutral associations
6
- based in fact (e.g. population x has a high proportion of lactose intolerant people).
7
- Datasets: .nan
8
- Group: BiasEvals
9
- Hashtags: .nan
10
- Link: 'StereoSet: Measuring stereotypical bias in pretrained language models'
11
- Modality: Text
12
- Screenshots: []
13
- Suggested Evaluation: StereoSet
14
- Type: Dataset
15
- URL: https://arxiv.org/abs/2004.09456
16
- What it is evaluating: Protected class stereotypes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/tango.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Abstract: "Transgender and non-binary (TGNB) individuals disproportionately experience discrimination and exclusion from daily life. Given the recent popularity and adoption of language generation technologies, the potential to further marginalize this population only grows. Although a multitude of NLP fairness literature focuses on illuminating and addressing gender biases, assessing gender harms for TGNB identities requires understanding how such identities uniquely interact with societal gender norms and how they differ from gender binary-centric perspectives. Such measurement frameworks inherently require centering TGNB voices to help guide the alignment between gender-inclusive NLP and whom they are intended to serve. Towards this goal, we ground our work in the TGNB community and existing interdisciplinary literature to assess how the social reality surrounding experienced marginalization of TGNB persons contributes to and persists within Open Language Generation (OLG). This social knowledge serves as a guide for evaluating popular large language models (LLMs) on two key aspects: (1) misgendering and (2) harmful responses to gender disclosure. To do this, we introduce TANGO, a dataset of template-based real-world text curated from a TGNB-oriented community. We discover a dominance of binary gender norms reflected by the models; LLMs least misgendered subjects in generated text when triggered by prompts whose subjects used binary pronouns. Meanwhile, misgendering was most prevalent when triggering generation with singular they and neopronouns. When prompted with gender disclosures, TGNB disclosure generated the most stigmatizing language and scored most toxic, on average. Our findings warrant further research on how TGNB harms manifest in LLMs and serve as a broader case study toward concretely grounding the design of gender-inclusive AI in community voices and interdisciplinary literature."
2
+ Applicable Models:
3
+ - GPT-2
4
+ - GPT-Neo
5
+ - OPT
6
+ Authors: Anaelia Ovalle, Palash Goyal, Jwala Dhamala, Zachary Jaggers, Kai-Wei Chang, Aram Galstyan, Richard Zemel, Rahul Gupta
7
+ Considerations: Based on automatic evaluations of the resulting open language generation, may be sensitive to the choice of evaluator. Would advice for a combination of perspective, detoxify, and regard metrics
8
+ Datasets: https://huggingface.co/datasets/AlexaAI/TANGO
9
+ Group: CulturalEvals
10
+ Hashtags: .nan
11
+ Link: '“I’m fully who I am”: Towards Centering Transgender and Non-Binary Voices to Measure Biases in Open Language Generation'
12
+ Modality: Text
13
+ Screenshots:
14
+ - Images/TANGO1.png
15
+ - Images/TANGO2.png
16
+ Suggested Evaluation: Human and Toxicity Evals of Cultural Value Categories
17
+ Level: Output
18
+ URL: http://arxiv.org/abs/2106.10328
19
+ What it is evaluating: Bias measurement for trans and nonbinary community via measuring gender non-affirmative language, specifically 1) misgendering 2), negative responses to gender disclosure
configs/videodiversemisinfo.yaml CHANGED
@@ -13,7 +13,7 @@ Modality: Video
13
  Screenshots: []
14
  Suggested Evaluation: 'Diverse Misinformation: Impacts of Human Biases on Detection
15
  of Deepfakes on Networks'
16
- Type: Output
17
  URL: https://arxiv.org/abs/2210.10026
18
  What it is evaluating: Human led evaluations of deepfakes to understand susceptibility
19
  and representational harms (including political violence)
 
13
  Screenshots: []
14
  Suggested Evaluation: 'Diverse Misinformation: Impacts of Human Biases on Detection
15
  of Deepfakes on Networks'
16
+ Level: Output
17
  URL: https://arxiv.org/abs/2210.10026
18
  What it is evaluating: Human led evaluations of deepfakes to understand susceptibility
19
  and representational harms (including political violence)
configs/weat.yaml CHANGED
@@ -36,7 +36,7 @@ Screenshots:
36
  - Images/WEAT1.png
37
  - Images/WEAT2.png
38
  Suggested Evaluation: Word Embedding Association Test (WEAT)
39
- Type: Model
40
  URL: https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily
41
  What it is evaluating: Associations and word embeddings based on Implicit Associations
42
  Test (IAT)
 
36
  - Images/WEAT1.png
37
  - Images/WEAT2.png
38
  Suggested Evaluation: Word Embedding Association Test (WEAT)
39
+ Level: Model
40
  URL: https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily
41
  What it is evaluating: Associations and word embeddings based on Implicit Associations
42
  Test (IAT)