NimaBoscarino's picture
WIP: Compliance Check pipeline w/ gradio app
11bd448
raw
history blame
2.54 kB
import os
from huggingface_hub import (comment_discussion,
create_discussion, get_discussion_details,
get_repo_discussions)
from tabulate import tabulate
from difflib import SequenceMatcher
KEY = os.environ.get("KEY")
def similar(a, b):
"""Check similarity of two sequences"""
return SequenceMatcher(None, a, b).ratio()
def create_metadata_breakdown_table(compliance_check_dictionary):
data = {k: v for k, v in compliance_check_dictionary.items()}
metadata_fields_column = list(data.keys())
metadata_values_column = list(data.values())
table_data = list(zip(metadata_fields_column, metadata_values_column))
return tabulate(
table_data, tablefmt="github", headers=("Compliance Check", "Present")
)
def create_markdown_report(
desired_metadata_dictionary, repo_name, update: bool = False
):
report = f"""# Model Card Regulatory Compliance report card {"(updated)" if update else ""}
\n
This is an automatically produced model card regulatory compliance report card for {repo_name}.
This report is meant as a POC!
\n
## Breakdown of metadata fields for your model
\n
{create_metadata_breakdown_table(desired_metadata_dictionary)}
\n
"""
return report
def create_or_update_report(compliance_check, repo_name):
report = create_markdown_report(
compliance_check, repo_name, update=False
)
repo_discussions = get_repo_discussions(
repo_name,
repo_type="model",
)
for discussion in repo_discussions:
if (
discussion.title == "Metadata Report Card" and discussion.status == "open"
): # An existing open report card thread
discussion_details = get_discussion_details(
repo_name, discussion.num, repo_type="model"
)
last_comment = discussion_details.events[-1].content
if similar(report, last_comment) <= 0.999:
report = create_markdown_report(
compliance_check,
repo_name,
update=True,
)
comment_discussion(
repo_name,
discussion.num,
comment=report,
repo_type="model",
)
return True
create_discussion(
repo_name,
"Model Card Regulatory Compliance Report Card",
description=report,
repo_type="model",
)
return True