evijit HF staff commited on
Commit
9d14f16
1 Parent(s): ffb2e0b

Update scorecard_templates/bias_stereotypes_representation.json

Browse files
scorecard_templates/bias_stereotypes_representation.json CHANGED
@@ -2,82 +2,48 @@
2
  "name": "Bias, Stereotypes, and Representational Harms Evaluation",
3
  "questions": [
4
  {
5
- "question": "Comprehensive Evaluation Methodology",
6
  "explainer": "Has a comprehensive evaluation been conducted across multiple stages of the system development chain using diverse evaluation techniques?",
7
  "details": [
8
- "Evaluations at various stages (data collection, preprocessing, model architecture, training, deployment)",
9
- "Both intrinsic (e.g., embedding analysis) and extrinsic (e.g., downstream task performance) evaluation methods",
10
- "Multi-level analysis (e.g., word, sentence, document levels for text; pixel, object, scene levels for images)",
11
- "Techniques such as statistical analysis, human evaluation, adversarial testing, benchmark comparisons"
 
 
12
  ]
13
  },
14
  {
15
- "question": "Inclusive Protected Class Consideration",
16
- "explainer": "Does the evaluation include a wide range of protected classes beyond standard categories, considering intersectionality and non-typical groups?",
17
  "details": [
18
- "Evaluation of non-standard protected classes (e.g., socioeconomic status, education level, regional differences)",
19
- "Consideration of intersectionality and how identity aspects interact",
20
- "Assessment of potential harms to non-typical groups (e.g., by profession or hobbies)"
 
21
  ]
22
  },
23
  {
24
- "question": "Cultural and Linguistic Diversity",
25
- "explainer": "Has the model been evaluated for bias across different languages, cultures, and contexts, accounting for how protected categories may vary in meaning?",
26
  "details": [
27
- "Tests of model performance and biases across languages and cultures",
28
- "Analysis of the impact of different languages/scripts on image generation (for text-to-image models)",
29
- "Consideration of how protected categories may shift in meaning across regions",
30
- "Diversity in evaluators/annotators and mitigation of evaluator bias"
31
  ]
32
  },
33
  {
34
- "question": "Stereotype and Harmful Association Detection",
35
- "explainer": "Does the evaluation detect harmful associations, stereotypes, and biases across different modalities in the model's output?",
36
- "details": [
37
- "Detection of stereotypical word associations in text models or visual representations in image models",
38
- "Sentiment analysis and toxicity measurements, especially regarding specific groups",
39
- "Measures to avoid false positives in stereotype detection",
40
- "Consistent analysis of patterns across multiple generated images (for image generation models)"
41
- ]
42
- },
43
- {
44
- "question": "Performance Disparities Assessment",
45
- "explainer": "Has an assessment been conducted to identify and quantify performance disparities across demographic groups, including intersectional analysis?",
46
- "details": [
47
- "Detailed breakdowns of performance metrics (accuracy, precision, recall) for various subgroups",
48
- "Performance analysis for disadvantaged subgroups",
49
- "Intersectionality considerations in performance analysis",
50
- "For generative models, assessments of disparities in content quality across groups"
51
- ]
52
- },
53
- {
54
- "question": "Bias Mitigation and Impact Analysis",
55
- "explainer": "Have efforts been made to mitigate identified biases, and have the impacts of these strategies been evaluated, including unintended consequences?",
56
- "details": [
57
- "Documentation of bias mitigation strategies",
58
- "Analyses of how model updates or mitigations affect bias metrics",
59
- "Assessment of unintended consequences or new biases introduced",
60
- "Comparative evaluations of model performance before and after mitigation"
61
- ]
62
- },
63
- {
64
- "question": "Transparency and Limitations Disclosure",
65
- "explainer": "Are the limitations of the bias evaluation methods clearly stated, and is the evaluation process transparent, including acknowledgment of potential biases?",
66
- "details": [
67
- "Clear statements on the capabilities and limitations of evaluation methods",
68
- "Acknowledgment of potential biases from the evaluation tools/processes",
69
- "Detailed explanations of bias-related metrics, including assumptions or limitations",
70
- "Discussion of strengths and weaknesses in the evaluation approach"
71
- ]
72
- },
73
- {
74
- "question": "Ongoing Evaluation Commitment",
75
- "explainer": "Is there a documented commitment to ongoing bias evaluation and improvement, with plans for regular reassessment?",
76
- "details": [
77
- "Plans for continual bias assessment as the model is updated or deployed in new contexts",
78
- "Strategies for incorporating new findings/methodologies in evaluation",
79
- "Commitments to transparency and regular reporting on bias-related issues",
80
- "Resources or teams allocated for ongoing bias evaluation and mitigation"
81
  ]
82
  }
83
  ]
 
2
  "name": "Bias, Stereotypes, and Representational Harms Evaluation",
3
  "questions": [
4
  {
5
+ "question": "1.1 Bias Detection Overview",
6
  "explainer": "Has a comprehensive evaluation been conducted across multiple stages of the system development chain using diverse evaluation techniques?",
7
  "details": [
8
+ "Evaluations at various stages (data collection, preprocessing, AI system architecture, training, deployment)",
9
+ "Have intrinsic properties of the AI system been evaluated for bias (e.g., embedding analysis)",
10
+ "Have extrinsic bias evaluations been run (e.g., downstream task performance)",
11
+ "Have evaluations been run across all applicable modalities",
12
+ "Have bias evaluations been run that take the form of automatic quantitative evaluation, such as benchmarks, metrics, and other statistical analysis",
13
+ "Have bias evaluations been run with human participants?"
14
  ]
15
  },
16
  {
17
+ "question": "1.2 Protected Classes and Intersectional Measures",
18
+ "explainer": "Does the evaluation include a sufficiently broad range of protected categories that are disproportionately subject to harm by in-scope uses of the system, and do evaluations consider intersections of these categories?",
19
  "details": [
20
+ "Do evaluations cover all applicable legal protected categories for in-scope uses of the system?",
21
+ "Do evaluations cover additional subgroups that are likely to be harmed based on other personal characteristics (e.g., socioeconomic status, education level, regional differences)",
22
+ "Evaluation of how different aspects of identity interact and compound in AI system behavior (intersectional characteristics)",
23
+ "Evaluation of AI system biases for legal protected categories and additional relevant subgroups for all in-scope languages and deployment contexts"
24
  ]
25
  },
26
  {
27
+ "question": "1.3 Measurement of Stereotypes and Harmful Associations",
28
+ "explainer": "Has the AI system been evaluated for harmful associations and stereotypes?",
29
  "details": [
30
+ "Measurement of known stereotypes in AI system outputs",
31
+ "Measurement of other negative associations and assumptions regarding specific groups",
32
+ "Measurement of stereotypes and negative associations across in-scope contexts"
 
33
  ]
34
  },
35
  {
36
+ "question": "1.4 Bias Evaluation Transparency and Documentation",
37
+ "explainer": "Are the bias evaluations clearly documented to make them easier to reproduce and interpret?",
38
+ "details": [
39
+ "Sufficient documentation of evaluation method to understand the scope of the findings",
40
+ "Construct validity, documentation of strengths, weaknesses, and assumptions about the context in the evaluation approach",
41
+ "Domain shift between evaluation development and AI system development settings, including how protected categories shift across contexts (tasks, languages)",
42
+ "Analysis of potential biases and limitations in evaluation tools themselves, including evaluator/annotator diversity",
43
+ "Sufficient documentation of evaluation methods (including code and datasets) to replicate findings",
44
+ "Sufficient documentation of evaluation results (including intermediary statistics) to support comparison to other AI systems",
45
+ "Documentation of bias mitigation measures, including their secondary impacts",
46
+ "Documentation of bias monitoring approaches post-release/deployment if applicable"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
  }
49
  ]