NimaBoscarino commited on
Commit
aae10fc
·
1 Parent(s): 25bf2cc

WIP: Dynamic block-based results, funky reporting

Browse files
Files changed (3) hide show
  1. app.py +61 -18
  2. compliance_checks.py +177 -27
  3. tests/test_compliance_checks.py +31 -18
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import gradio as gr
 
2
 
3
  from compliance_checks import (
4
  ComplianceSuite,
 
5
  ModelProviderIdentityCheck,
6
  IntendedPurposeCheck,
7
  GeneralLimitationsCheck,
@@ -10,42 +12,83 @@ from compliance_checks import (
10
 
11
  from bloom_card import bloom_card
12
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def run_compliance_check(model_card: str):
15
- suite = ComplianceSuite(checks=[
16
- ModelProviderIdentityCheck(),
17
- IntendedPurposeCheck(),
18
- GeneralLimitationsCheck(),
19
- ComputationalRequirementsCheck(),
20
- ])
21
 
 
22
  results = suite.run(model_card)
23
 
24
- return str([r[0] for r in results])
 
 
 
 
 
 
 
 
25
 
26
 
27
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
28
  gr.Markdown("""\
29
  # Model Card Validator
30
  Following Article 13 of the EU AI Act
31
  """)
32
 
33
- with gr.Row():
34
- with gr.Column():
35
- model_card_box = gr.TextArea()
36
- populate_sample = gr.Button(value="Populate Sample")
37
- submit = gr.Button()
 
38
 
39
  with gr.Column():
40
- results_list = gr.Text()
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- submit.click(
43
  fn=run_compliance_check,
44
  inputs=[model_card_box],
45
- outputs=[results_list]
 
 
 
 
 
 
46
  )
47
 
48
- populate_sample.click(
49
  fn=lambda: bloom_card,
50
  inputs=[],
51
  outputs=[model_card_box]
 
1
  import gradio as gr
2
+ from huggingface_hub import ModelCard
3
 
4
  from compliance_checks import (
5
  ComplianceSuite,
6
+ ComplianceCheck,
7
  ModelProviderIdentityCheck,
8
  IntendedPurposeCheck,
9
  GeneralLimitationsCheck,
 
12
 
13
  from bloom_card import bloom_card
14
 
15
+ checks = [
16
+ ModelProviderIdentityCheck(),
17
+ IntendedPurposeCheck(),
18
+ GeneralLimitationsCheck(),
19
+ ComputationalRequirementsCheck(),
20
+ ]
21
+ suite = ComplianceSuite(checks=checks)
22
+
23
+
24
+ def status_emoji(status: bool):
25
+ return "✅" if status else "🛑"
26
 
 
 
 
 
 
 
 
27
 
28
+ def run_compliance_check(model_card: str):
29
  results = suite.run(model_card)
30
 
31
+ return [
32
+ *[gr.Accordion.update(label=f"{r.name} - {status_emoji(r.status)}") for r in results],
33
+ *[gr.Markdown.update(value=r.to_string()) for r in results],
34
+ ]
35
+
36
+
37
+ def fetch_and_run_compliance_check(model_id: str):
38
+ model_card = ModelCard.load(repo_id_or_path=model_id).content
39
+ return run_compliance_check(model_card=model_card)
40
 
41
 
42
+ def compliance_result(compliance_check: ComplianceCheck):
43
+ accordion = gr.Accordion(label=f"{compliance_check.name}", open=False)
44
+ with accordion:
45
+ description = gr.Markdown("Run an evaluation to see results...")
46
+
47
+ return accordion, description
48
+
49
+
50
+ with gr.Blocks(css="#reverse-row { flex-direction: row-reverse; }") as demo:
51
  gr.Markdown("""\
52
  # Model Card Validator
53
  Following Article 13 of the EU AI Act
54
  """)
55
 
56
+ with gr.Row(elem_id="reverse-row"):
57
+ with gr.Tab(label="Results"):
58
+ with gr.Column():
59
+ compliance_results = [compliance_result(c) for c in suite.checks]
60
+ compliance_accordions = [c[0] for c in compliance_results]
61
+ compliance_descriptions = [c[1] for c in compliance_results]
62
 
63
  with gr.Column():
64
+ with gr.Tab(label="Markdown"):
65
+ model_card_box = gr.TextArea()
66
+ populate_sample_card = gr.Button(value="Populate Sample")
67
+ submit_markdown = gr.Button()
68
+ with gr.Tab(label="Search for Model"):
69
+ model_id_search = gr.Text()
70
+ submit_model_search = gr.Button()
71
+ gr.Examples(
72
+ examples=["society-ethics/model-card-webhook-test"],
73
+ inputs=[model_id_search],
74
+ outputs=[*compliance_accordions, *compliance_descriptions],
75
+ fn=fetch_and_run_compliance_check,
76
+ # cache_examples=True, # TODO: Why does this break the app?
77
+ )
78
 
79
+ submit_markdown.click(
80
  fn=run_compliance_check,
81
  inputs=[model_card_box],
82
+ outputs=[*compliance_accordions, *compliance_descriptions]
83
+ )
84
+
85
+ submit_model_search.click(
86
+ fn=fetch_and_run_compliance_check,
87
+ inputs=[model_id_search],
88
+ outputs=[*compliance_accordions, *compliance_descriptions]
89
  )
90
 
91
+ populate_sample_card.click(
92
  fn=lambda: bloom_card,
93
  inputs=[],
94
  outputs=[model_card_box]
compliance_checks.py CHANGED
@@ -1,30 +1,10 @@
1
  from abc import ABC, abstractmethod
 
2
 
3
  import markdown
4
  from bs4 import BeautifulSoup, Comment
5
 
6
 
7
- class ComplianceCheck(ABC):
8
- @abstractmethod
9
- def run_check(self, card: BeautifulSoup):
10
- raise NotImplementedError
11
-
12
-
13
- class ModelProviderIdentityCheck(ComplianceCheck):
14
- def run_check(self, card: BeautifulSoup):
15
- try:
16
- developed_by = card.find("strong", string="Developed by:")
17
-
18
- developer = "".join([str(s) for s in developed_by.next_siblings]).strip()
19
-
20
- if developer == "[More Information Needed]":
21
- return False, None
22
-
23
- return True, developer
24
- except AttributeError:
25
- return False, None
26
-
27
-
28
  def walk_to_next_heading(card, heading, heading_text):
29
  stop_at = [heading, f"h{int(heading[1]) - 1}"]
30
 
@@ -49,33 +29,203 @@ def walk_to_next_heading(card, heading, heading_text):
49
  return False, None
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  class IntendedPurposeCheck(ComplianceCheck):
 
 
53
  def run_check(self, card: BeautifulSoup):
54
  direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
55
  # TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
56
  downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
57
  out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
58
- return (
59
- direct_use_check and out_of_scope_use_check,
60
- [direct_use_content, downstream_use_content, out_of_scope_use_content]
 
 
61
  )
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  class GeneralLimitationsCheck(ComplianceCheck):
 
 
65
  def run_check(self, card: BeautifulSoup):
66
- return walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  class ComputationalRequirementsCheck(ComplianceCheck):
 
 
70
  def run_check(self, card: BeautifulSoup):
71
- return walk_to_next_heading(card, "h3", "Compute infrastructure")
 
 
 
 
 
72
 
73
 
74
  class ComplianceSuite:
75
  def __init__(self, checks):
76
  self.checks = checks
77
 
78
- def run(self, model_card):
79
  model_card_html = markdown.markdown(model_card)
80
  card_soup = BeautifulSoup(model_card_html, features="html.parser")
81
 
 
1
  from abc import ABC, abstractmethod
2
+ from typing import Optional, List
3
 
4
  import markdown
5
  from bs4 import BeautifulSoup, Comment
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def walk_to_next_heading(card, heading, heading_text):
9
  stop_at = [heading, f"h{int(heading[1]) - 1}"]
10
 
 
29
  return False, None
30
 
31
 
32
+ class ComplianceResult(ABC):
33
+ name: str = None
34
+
35
+ def __init__(self, status: Optional[bool] = False, *args, **kwargs):
36
+ self.status = status
37
+
38
+ def __eq__(self, other):
39
+ try:
40
+ assert self.status == other.status
41
+ return True
42
+ except AssertionError:
43
+ return False
44
+
45
+ @abstractmethod
46
+ def to_string(self):
47
+ return "Not Implemented"
48
+
49
+
50
+ class ComplianceCheck(ABC):
51
+ name: str = None
52
+
53
+ @abstractmethod
54
+ def run_check(self, card: BeautifulSoup) -> ComplianceResult:
55
+ raise NotImplementedError
56
+
57
+
58
+ class ModelProviderIdentityResult(ComplianceResult):
59
+ name = "Model Provider Identity"
60
+
61
+ def __init__(self, provider: str = None, *args, **kwargs):
62
+ super().__init__(*args, **kwargs)
63
+ self.provider = provider
64
+
65
+ def __eq__(self, other):
66
+ if isinstance(other, ModelProviderIdentityResult):
67
+ if super().__eq__(other):
68
+ try:
69
+ assert self.provider == other.provider
70
+ return True
71
+ except AssertionError:
72
+ return False
73
+ else:
74
+ return False
75
+
76
+ def to_string(self):
77
+ return str(self.provider)
78
+
79
+
80
+ class ModelProviderIdentityCheck(ComplianceCheck):
81
+ name = "Model Provider Identity"
82
+
83
+ def run_check(self, card: BeautifulSoup):
84
+ try:
85
+ developed_by = card.find("strong", string="Developed by:")
86
+
87
+ developer = "".join([str(s) for s in developed_by.next_siblings]).strip()
88
+
89
+ if developer == "[More Information Needed]":
90
+ return ModelProviderIdentityResult()
91
+
92
+ return ModelProviderIdentityResult(status=True, provider=developer)
93
+ except AttributeError:
94
+ return ModelProviderIdentityResult()
95
+
96
+
97
+ class IntendedPurposeResult(ComplianceResult):
98
+ name = "Intended Purpose"
99
+
100
+ def __init__(
101
+ self,
102
+ direct_use: str = None,
103
+ downstream_use: str = None,
104
+ out_of_scope_use: str = None,
105
+ *args,
106
+ **kwargs,
107
+ ):
108
+ super().__init__(*args, **kwargs)
109
+ self.direct_use = direct_use
110
+ self.downstream_use = downstream_use
111
+ self.out_of_scope_use = out_of_scope_use
112
+
113
+ def __eq__(self, other):
114
+ if isinstance(other, IntendedPurposeResult):
115
+ if super().__eq__(other):
116
+ try:
117
+ assert self.direct_use == other.direct_use
118
+ assert self.downstream_use == other.downstream_use
119
+ assert self.out_of_scope_use == other.out_of_scope_use
120
+ return True
121
+ except AssertionError:
122
+ return False
123
+ else:
124
+ return False
125
+
126
+ def to_string(self):
127
+ return str((self.direct_use, self.direct_use, self.out_of_scope_use))
128
+
129
+
130
  class IntendedPurposeCheck(ComplianceCheck):
131
+ name = "Intended Purpose"
132
+
133
  def run_check(self, card: BeautifulSoup):
134
  direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
135
  # TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
136
  downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
137
  out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
138
+ return IntendedPurposeResult(
139
+ status=direct_use_check and out_of_scope_use_check,
140
+ direct_use=direct_use_content,
141
+ downstream_use=downstream_use_content,
142
+ out_of_scope_use=out_of_scope_use_content
143
  )
144
 
145
 
146
+ class GeneralLimitationsResult(ComplianceResult):
147
+ name = "General Limitations"
148
+
149
+ def __init__(
150
+ self,
151
+ limitations: str = None,
152
+ *args,
153
+ **kwargs,
154
+ ):
155
+ super().__init__(*args, **kwargs)
156
+ self.limitations = limitations
157
+
158
+ def __eq__(self, other):
159
+ if isinstance(other, GeneralLimitationsResult):
160
+ if super().__eq__(other):
161
+ try:
162
+ assert self.limitations == other.limitations
163
+ return True
164
+ except AssertionError:
165
+ return False
166
+ else:
167
+ return False
168
+
169
+ def to_string(self):
170
+ return self.limitations
171
+
172
+
173
  class GeneralLimitationsCheck(ComplianceCheck):
174
+ name = "General Limitations"
175
+
176
  def run_check(self, card: BeautifulSoup):
177
+ check, content = walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations")
178
+
179
+ return GeneralLimitationsResult(
180
+ status=check,
181
+ limitations=content
182
+ )
183
+
184
+
185
+ class ComputationalRequirementsResult(ComplianceResult):
186
+ name = "Computational Requirements"
187
+
188
+ def __init__(
189
+ self,
190
+ requirements: str = None,
191
+ *args,
192
+ **kwargs,
193
+ ):
194
+ super().__init__(*args, **kwargs)
195
+ self.requirements = requirements
196
+
197
+ def __eq__(self, other):
198
+ if isinstance(other, ComputationalRequirementsResult):
199
+ if super().__eq__(other):
200
+ try:
201
+ assert self.requirements == other.requirements
202
+ return True
203
+ except AssertionError:
204
+ return False
205
+ else:
206
+ return False
207
+
208
+ def to_string(self):
209
+ return self.requirements
210
 
211
 
212
  class ComputationalRequirementsCheck(ComplianceCheck):
213
+ name = "Computational Requirements"
214
+
215
  def run_check(self, card: BeautifulSoup):
216
+ check, content = walk_to_next_heading(card, "h3", "Compute infrastructure")
217
+
218
+ return ComputationalRequirementsResult(
219
+ status=check,
220
+ requirements=content,
221
+ )
222
 
223
 
224
  class ComplianceSuite:
225
  def __init__(self, checks):
226
  self.checks = checks
227
 
228
+ def run(self, model_card) -> List[ComplianceResult]:
229
  model_card_html = markdown.markdown(model_card)
230
  card_soup = BeautifulSoup(model_card_html, features="html.parser")
231
 
tests/test_compliance_checks.py CHANGED
@@ -5,10 +5,10 @@ import markdown
5
  from bs4 import BeautifulSoup
6
  from compliance_checks import (
7
  ComplianceSuite,
8
- ModelProviderIdentityCheck,
9
- IntendedPurposeCheck,
10
- GeneralLimitationsCheck,
11
- ComputationalRequirementsCheck,
12
  )
13
 
14
 
@@ -201,26 +201,39 @@ Etc..
201
  Etc..
202
  """
203
 
204
- @pytest.mark.parametrize("check, card,check_passed,values", [
205
- (ModelProviderIdentityCheck(), "provider_identity_model_card", True, "Nima Boscarino"),
206
- (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", False, None),
207
- (IntendedPurposeCheck(), "intended_purpose_model_card", True, ["Here is some info about direct uses...", None, "Here is some info about out-of-scope uses..."]),
208
- (IntendedPurposeCheck(), "bad_intended_purpose_model_card", False, [None, None, None]),
209
- (GeneralLimitationsCheck(), "general_limitations_model_card", True, "Hello world! These are some risks..."),
210
- (GeneralLimitationsCheck(), "bad_general_limitations_model_card", False, None),
211
- (ComputationalRequirementsCheck(), "computational_requirements_model_card", True, expected_infrastructure),
212
- (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", False, None),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  ])
214
- def test_run_model_provider_identity_check(self, check, card, check_passed, values, request):
215
  card = request.getfixturevalue(card)
216
 
217
  model_card_html = markdown.markdown(card)
218
  card_soup = BeautifulSoup(model_card_html, features="html.parser")
219
 
220
- results_check_passed, results_values = check.run_check(card_soup)
221
 
222
- assert results_check_passed == check_passed
223
- assert results_values == values
224
 
225
 
226
  class TestComplianceSuite:
@@ -333,4 +346,4 @@ Jean Zay Public Supercomputer, provided by the French government.
333
 
334
  results = suite.run(card)
335
 
336
- assert all([r[0] for r in results])
 
5
  from bs4 import BeautifulSoup
6
  from compliance_checks import (
7
  ComplianceSuite,
8
+ ModelProviderIdentityCheck, ModelProviderIdentityResult,
9
+ IntendedPurposeCheck, IntendedPurposeResult,
10
+ GeneralLimitationsCheck, GeneralLimitationsResult,
11
+ ComputationalRequirementsCheck, ComputationalRequirementsResult,
12
  )
13
 
14
 
 
201
  Etc..
202
  """
203
 
204
+ @pytest.mark.parametrize("check,card,expected", [
205
+ (ModelProviderIdentityCheck(), "provider_identity_model_card", ModelProviderIdentityResult(
206
+ status=True,
207
+ provider="Nima Boscarino",
208
+ )),
209
+ (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", ModelProviderIdentityResult()),
210
+ (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
211
+ status=True,
212
+ direct_use="Here is some info about direct uses...",
213
+ downstream_use=None,
214
+ out_of_scope_use="Here is some info about out-of-scope uses...",
215
+ )),
216
+ (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
217
+ (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult(
218
+ status=True,
219
+ limitations="Hello world! These are some risks..."
220
+ )),
221
+ (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()),
222
+ (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult(
223
+ status=True,
224
+ requirements=expected_infrastructure,
225
+ )),
226
+ (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()),
227
  ])
228
+ def test_run_checks(self, check, card, expected, request):
229
  card = request.getfixturevalue(card)
230
 
231
  model_card_html = markdown.markdown(card)
232
  card_soup = BeautifulSoup(model_card_html, features="html.parser")
233
 
234
+ results = check.run_check(card_soup)
235
 
236
+ assert results == expected
 
237
 
238
 
239
  class TestComplianceSuite:
 
346
 
347
  results = suite.run(card)
348
 
349
+ assert all([r.status for r in results])