Spaces:
Runtime error
Runtime error
import os | |
import re | |
from pathlib import Path | |
from typing import Any, Dict, Literal, Optional, Type, Union | |
import requests | |
import yaml | |
from huggingface_hub.file_download import hf_hub_download | |
from huggingface_hub.hf_api import upload_file | |
from huggingface_hub.repocard_data import ( | |
CardData, | |
DatasetCardData, | |
EvalResult, | |
ModelCardData, | |
SpaceCardData, | |
eval_results_to_model_index, | |
model_index_to_eval_results, | |
) | |
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump | |
from . import constants | |
from .errors import EntryNotFoundError | |
from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args | |
logger = logging.get_logger(__name__) | |
TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md" | |
TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md" | |
# exact same regex as in the Hub server. Please keep in sync. | |
# See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18 | |
REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))") | |
class RepoCard: | |
card_data_class = CardData | |
default_template_path = TEMPLATE_MODELCARD_PATH | |
repo_type = "model" | |
def __init__(self, content: str, ignore_metadata_errors: bool = False): | |
"""Initialize a RepoCard from string content. The content should be a | |
Markdown file with a YAML block at the beginning and a Markdown body. | |
Args: | |
content (`str`): The content of the Markdown file. | |
Example: | |
```python | |
>>> from huggingface_hub.repocard import RepoCard | |
>>> text = ''' | |
... --- | |
... language: en | |
... license: mit | |
... --- | |
... | |
... # My repo | |
... ''' | |
>>> card = RepoCard(text) | |
>>> card.data.to_dict() | |
{'language': 'en', 'license': 'mit'} | |
>>> card.text | |
'\\n# My repo\\n' | |
``` | |
<Tip> | |
Raises the following error: | |
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) | |
when the content of the repo card metadata is not a dictionary. | |
</Tip> | |
""" | |
# Set the content of the RepoCard, as well as underlying .data and .text attributes. | |
# See the `content` property setter for more details. | |
self.ignore_metadata_errors = ignore_metadata_errors | |
self.content = content | |
def content(self): | |
"""The content of the RepoCard, including the YAML block and the Markdown body.""" | |
line_break = _detect_line_ending(self._content) or "\n" | |
return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}" | |
def content(self, content: str): | |
"""Set the content of the RepoCard.""" | |
self._content = content | |
match = REGEX_YAML_BLOCK.search(content) | |
if match: | |
# Metadata found in the YAML block | |
yaml_block = match.group(2) | |
self.text = content[match.end() :] | |
data_dict = yaml.safe_load(yaml_block) | |
if data_dict is None: | |
data_dict = {} | |
# The YAML block's data should be a dictionary | |
if not isinstance(data_dict, dict): | |
raise ValueError("repo card metadata block should be a dict") | |
else: | |
# Model card without metadata... create empty metadata | |
logger.warning("Repo card metadata block was not found. Setting CardData to empty.") | |
data_dict = {} | |
self.text = content | |
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors) | |
self._original_order = list(data_dict.keys()) | |
def __str__(self): | |
return self.content | |
def save(self, filepath: Union[Path, str]): | |
r"""Save a RepoCard to a file. | |
Args: | |
filepath (`Union[Path, str]`): Filepath to the markdown file to save. | |
Example: | |
```python | |
>>> from huggingface_hub.repocard import RepoCard | |
>>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card") | |
>>> card.save("/tmp/test.md") | |
``` | |
""" | |
filepath = Path(filepath) | |
filepath.parent.mkdir(parents=True, exist_ok=True) | |
# Preserve newlines as in the existing file. | |
with open(filepath, mode="w", newline="", encoding="utf-8") as f: | |
f.write(str(self)) | |
def load( | |
cls, | |
repo_id_or_path: Union[str, Path], | |
repo_type: Optional[str] = None, | |
token: Optional[str] = None, | |
ignore_metadata_errors: bool = False, | |
): | |
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath. | |
Args: | |
repo_id_or_path (`Union[str, Path]`): | |
The repo ID associated with a Hugging Face Hub repo or a local filepath. | |
repo_type (`str`, *optional*): | |
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options | |
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child | |
class, the default value will be the child class's `repo_type`. | |
token (`str`, *optional*): | |
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token. | |
ignore_metadata_errors (`str`): | |
If True, errors while parsing the metadata section will be ignored. Some information might be lost during | |
the process. Use it at your own risk. | |
Returns: | |
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's | |
README.md file or filepath. | |
Example: | |
```python | |
>>> from huggingface_hub.repocard import RepoCard | |
>>> card = RepoCard.load("nateraw/food") | |
>>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"] | |
``` | |
""" | |
if Path(repo_id_or_path).exists(): | |
card_path = Path(repo_id_or_path) | |
elif isinstance(repo_id_or_path, str): | |
card_path = Path( | |
hf_hub_download( | |
repo_id_or_path, | |
constants.REPOCARD_NAME, | |
repo_type=repo_type or cls.repo_type, | |
token=token, | |
) | |
) | |
else: | |
raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).") | |
# Preserve newlines in the existing file. | |
with card_path.open(mode="r", newline="", encoding="utf-8") as f: | |
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors) | |
def validate(self, repo_type: Optional[str] = None): | |
"""Validates card against Hugging Face Hub's card validation logic. | |
Using this function requires access to the internet, so it is only called | |
internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`]. | |
Args: | |
repo_type (`str`, *optional*, defaults to "model"): | |
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". | |
If this function is called from a child class, the default will be the child class's `repo_type`. | |
<Tip> | |
Raises the following errors: | |
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) | |
if the card fails validation checks. | |
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) | |
if the request to the Hub API fails for any other reason. | |
</Tip> | |
""" | |
# If repo type is provided, otherwise, use the repo type of the card. | |
repo_type = repo_type or self.repo_type | |
body = { | |
"repoType": repo_type, | |
"content": str(self), | |
} | |
headers = {"Accept": "text/plain"} | |
try: | |
r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers) | |
r.raise_for_status() | |
except requests.exceptions.HTTPError as exc: | |
if r.status_code == 400: | |
raise ValueError(r.text) | |
else: | |
raise exc | |
def push_to_hub( | |
self, | |
repo_id: str, | |
token: Optional[str] = None, | |
repo_type: Optional[str] = None, | |
commit_message: Optional[str] = None, | |
commit_description: Optional[str] = None, | |
revision: Optional[str] = None, | |
create_pr: Optional[bool] = None, | |
parent_commit: Optional[str] = None, | |
): | |
"""Push a RepoCard to a Hugging Face Hub repo. | |
Args: | |
repo_id (`str`): | |
The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food". | |
token (`str`, *optional*): | |
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to | |
the stored token. | |
repo_type (`str`, *optional*, defaults to "model"): | |
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this | |
function is called by a child class, it will default to the child class's `repo_type`. | |
commit_message (`str`, *optional*): | |
The summary / title / first line of the generated commit. | |
commit_description (`str`, *optional*) | |
The description of the generated commit. | |
revision (`str`, *optional*): | |
The git revision to commit from. Defaults to the head of the `"main"` branch. | |
create_pr (`bool`, *optional*): | |
Whether or not to create a Pull Request with this commit. Defaults to `False`. | |
parent_commit (`str`, *optional*): | |
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. | |
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. | |
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. | |
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be | |
especially useful if the repo is updated / committed to concurrently. | |
Returns: | |
`str`: URL of the commit which updated the card metadata. | |
""" | |
# If repo type is provided, otherwise, use the repo type of the card. | |
repo_type = repo_type or self.repo_type | |
# Validate card before pushing to hub | |
self.validate(repo_type=repo_type) | |
with SoftTemporaryDirectory() as tmpdir: | |
tmp_path = Path(tmpdir) / constants.REPOCARD_NAME | |
tmp_path.write_text(str(self)) | |
url = upload_file( | |
path_or_fileobj=str(tmp_path), | |
path_in_repo=constants.REPOCARD_NAME, | |
repo_id=repo_id, | |
token=token, | |
repo_type=repo_type, | |
commit_message=commit_message, | |
commit_description=commit_description, | |
create_pr=create_pr, | |
revision=revision, | |
parent_commit=parent_commit, | |
) | |
return url | |
def from_template( | |
cls, | |
card_data: CardData, | |
template_path: Optional[str] = None, | |
template_str: Optional[str] = None, | |
**template_kwargs, | |
): | |
"""Initialize a RepoCard from a template. By default, it uses the default template. | |
Templates are Jinja2 templates that can be customized by passing keyword arguments. | |
Args: | |
card_data (`huggingface_hub.CardData`): | |
A huggingface_hub.CardData instance containing the metadata you want to include in the YAML | |
header of the repo card on the Hugging Face Hub. | |
template_path (`str`, *optional*): | |
A path to a markdown file with optional Jinja template variables that can be filled | |
in with `template_kwargs`. Defaults to the default template. | |
Returns: | |
[`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the | |
template. | |
""" | |
if is_jinja_available(): | |
import jinja2 | |
else: | |
raise ImportError( | |
"Using RepoCard.from_template requires Jinja2 to be installed. Please" | |
" install it with `pip install Jinja2`." | |
) | |
kwargs = card_data.to_dict().copy() | |
kwargs.update(template_kwargs) # Template_kwargs have priority | |
if template_path is not None: | |
template_str = Path(template_path).read_text() | |
if template_str is None: | |
template_str = Path(cls.default_template_path).read_text() | |
template = jinja2.Template(template_str) | |
content = template.render(card_data=card_data.to_yaml(), **kwargs) | |
return cls(content) | |
class ModelCard(RepoCard): | |
card_data_class = ModelCardData | |
default_template_path = TEMPLATE_MODELCARD_PATH | |
repo_type = "model" | |
def from_template( # type: ignore # violates Liskov property but easier to use | |
cls, | |
card_data: ModelCardData, | |
template_path: Optional[str] = None, | |
template_str: Optional[str] = None, | |
**template_kwargs, | |
): | |
"""Initialize a ModelCard from a template. By default, it uses the default template, which can be found here: | |
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md | |
Templates are Jinja2 templates that can be customized by passing keyword arguments. | |
Args: | |
card_data (`huggingface_hub.ModelCardData`): | |
A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML | |
header of the model card on the Hugging Face Hub. | |
template_path (`str`, *optional*): | |
A path to a markdown file with optional Jinja template variables that can be filled | |
in with `template_kwargs`. Defaults to the default template. | |
Returns: | |
[`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the | |
template. | |
Example: | |
```python | |
>>> from huggingface_hub import ModelCard, ModelCardData, EvalResult | |
>>> # Using the Default Template | |
>>> card_data = ModelCardData( | |
... language='en', | |
... license='mit', | |
... library_name='timm', | |
... tags=['image-classification', 'resnet'], | |
... datasets=['beans'], | |
... metrics=['accuracy'], | |
... ) | |
>>> card = ModelCard.from_template( | |
... card_data, | |
... model_description='This model does x + y...' | |
... ) | |
>>> # Including Evaluation Results | |
>>> card_data = ModelCardData( | |
... language='en', | |
... tags=['image-classification', 'resnet'], | |
... eval_results=[ | |
... EvalResult( | |
... task_type='image-classification', | |
... dataset_type='beans', | |
... dataset_name='Beans', | |
... metric_type='accuracy', | |
... metric_value=0.9, | |
... ), | |
... ], | |
... model_name='my-cool-model', | |
... ) | |
>>> card = ModelCard.from_template(card_data) | |
>>> # Using a Custom Template | |
>>> card_data = ModelCardData( | |
... language='en', | |
... tags=['image-classification', 'resnet'] | |
... ) | |
>>> card = ModelCard.from_template( | |
... card_data=card_data, | |
... template_path='./src/huggingface_hub/templates/modelcard_template.md', | |
... custom_template_var='custom value', # will be replaced in template if it exists | |
... ) | |
``` | |
""" | |
return super().from_template(card_data, template_path, template_str, **template_kwargs) | |
class DatasetCard(RepoCard): | |
card_data_class = DatasetCardData | |
default_template_path = TEMPLATE_DATASETCARD_PATH | |
repo_type = "dataset" | |
def from_template( # type: ignore # violates Liskov property but easier to use | |
cls, | |
card_data: DatasetCardData, | |
template_path: Optional[str] = None, | |
template_str: Optional[str] = None, | |
**template_kwargs, | |
): | |
"""Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here: | |
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md | |
Templates are Jinja2 templates that can be customized by passing keyword arguments. | |
Args: | |
card_data (`huggingface_hub.DatasetCardData`): | |
A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML | |
header of the dataset card on the Hugging Face Hub. | |
template_path (`str`, *optional*): | |
A path to a markdown file with optional Jinja template variables that can be filled | |
in with `template_kwargs`. Defaults to the default template. | |
Returns: | |
[`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the | |
template. | |
Example: | |
```python | |
>>> from huggingface_hub import DatasetCard, DatasetCardData | |
>>> # Using the Default Template | |
>>> card_data = DatasetCardData( | |
... language='en', | |
... license='mit', | |
... annotations_creators='crowdsourced', | |
... task_categories=['text-classification'], | |
... task_ids=['sentiment-classification', 'text-scoring'], | |
... multilinguality='monolingual', | |
... pretty_name='My Text Classification Dataset', | |
... ) | |
>>> card = DatasetCard.from_template( | |
... card_data, | |
... pretty_name=card_data.pretty_name, | |
... ) | |
>>> # Using a Custom Template | |
>>> card_data = DatasetCardData( | |
... language='en', | |
... license='mit', | |
... ) | |
>>> card = DatasetCard.from_template( | |
... card_data=card_data, | |
... template_path='./src/huggingface_hub/templates/datasetcard_template.md', | |
... custom_template_var='custom value', # will be replaced in template if it exists | |
... ) | |
``` | |
""" | |
return super().from_template(card_data, template_path, template_str, **template_kwargs) | |
class SpaceCard(RepoCard): | |
card_data_class = SpaceCardData | |
default_template_path = TEMPLATE_MODELCARD_PATH | |
repo_type = "space" | |
def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722 | |
"""Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines. | |
Uses same implementation as in Hub server, keep it in sync. | |
Returns: | |
str: The detected line ending of the string. | |
""" | |
cr = content.count("\r") | |
lf = content.count("\n") | |
crlf = content.count("\r\n") | |
if cr + lf == 0: | |
return None | |
if crlf == cr and crlf == lf: | |
return "\r\n" | |
if cr > lf: | |
return "\r" | |
else: | |
return "\n" | |
def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]: | |
content = Path(local_path).read_text() | |
match = REGEX_YAML_BLOCK.search(content) | |
if match: | |
yaml_block = match.group(2) | |
data = yaml.safe_load(yaml_block) | |
if data is None or isinstance(data, dict): | |
return data | |
raise ValueError("repo card metadata block should be a dict") | |
else: | |
return None | |
def metadata_save(local_path: Union[str, Path], data: Dict) -> None: | |
""" | |
Save the metadata dict in the upper YAML part Trying to preserve newlines as | |
in the existing file. Docs about open() with newline="" parameter: | |
https://docs.python.org/3/library/functions.html?highlight=open#open Does | |
not work with "^M" linebreaks, which are replaced by \n | |
""" | |
line_break = "\n" | |
content = "" | |
# try to detect existing newline character | |
if os.path.exists(local_path): | |
with open(local_path, "r", newline="", encoding="utf8") as readme: | |
content = readme.read() | |
if isinstance(readme.newlines, tuple): | |
line_break = readme.newlines[0] | |
elif isinstance(readme.newlines, str): | |
line_break = readme.newlines | |
# creates a new file if it not | |
with open(local_path, "w", newline="", encoding="utf8") as readme: | |
data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break) | |
# sort_keys: keep dict order | |
match = REGEX_YAML_BLOCK.search(content) | |
if match: | |
output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :] | |
else: | |
output = f"---{line_break}{data_yaml}---{line_break}{content}" | |
readme.write(output) | |
readme.close() | |
def metadata_eval_result( | |
*, | |
model_pretty_name: str, | |
task_pretty_name: str, | |
task_id: str, | |
metrics_pretty_name: str, | |
metrics_id: str, | |
metrics_value: Any, | |
dataset_pretty_name: str, | |
dataset_id: str, | |
metrics_config: Optional[str] = None, | |
metrics_verified: bool = False, | |
dataset_config: Optional[str] = None, | |
dataset_split: Optional[str] = None, | |
dataset_revision: Optional[str] = None, | |
metrics_verification_token: Optional[str] = None, | |
) -> Dict: | |
""" | |
Creates a metadata dict with the result from a model evaluated on a dataset. | |
Args: | |
model_pretty_name (`str`): | |
The name of the model in natural language. | |
task_pretty_name (`str`): | |
The name of a task in natural language. | |
task_id (`str`): | |
Example: automatic-speech-recognition. A task id. | |
metrics_pretty_name (`str`): | |
A name for the metric in natural language. Example: Test WER. | |
metrics_id (`str`): | |
Example: wer. A metric id from https://hf.co/metrics. | |
metrics_value (`Any`): | |
The value from the metric. Example: 20.0 or "20.0 ± 1.2". | |
dataset_pretty_name (`str`): | |
The name of the dataset in natural language. | |
dataset_id (`str`): | |
Example: common_voice. A dataset id from https://hf.co/datasets. | |
metrics_config (`str`, *optional*): | |
The name of the metric configuration used in `load_metric()`. | |
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. | |
metrics_verified (`bool`, *optional*, defaults to `False`): | |
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. | |
dataset_config (`str`, *optional*): | |
Example: fr. The name of the dataset configuration used in `load_dataset()`. | |
dataset_split (`str`, *optional*): | |
Example: test. The name of the dataset split used in `load_dataset()`. | |
dataset_revision (`str`, *optional*): | |
Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision | |
used in `load_dataset()`. | |
metrics_verification_token (`bool`, *optional*): | |
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. | |
Returns: | |
`dict`: a metadata dict with the result from a model evaluated on a dataset. | |
Example: | |
```python | |
>>> from huggingface_hub import metadata_eval_result | |
>>> results = metadata_eval_result( | |
... model_pretty_name="RoBERTa fine-tuned on ReactionGIF", | |
... task_pretty_name="Text Classification", | |
... task_id="text-classification", | |
... metrics_pretty_name="Accuracy", | |
... metrics_id="accuracy", | |
... metrics_value=0.2662102282047272, | |
... dataset_pretty_name="ReactionJPEG", | |
... dataset_id="julien-c/reactionjpeg", | |
... dataset_config="default", | |
... dataset_split="test", | |
... ) | |
>>> results == { | |
... 'model-index': [ | |
... { | |
... 'name': 'RoBERTa fine-tuned on ReactionGIF', | |
... 'results': [ | |
... { | |
... 'task': { | |
... 'type': 'text-classification', | |
... 'name': 'Text Classification' | |
... }, | |
... 'dataset': { | |
... 'name': 'ReactionJPEG', | |
... 'type': 'julien-c/reactionjpeg', | |
... 'config': 'default', | |
... 'split': 'test' | |
... }, | |
... 'metrics': [ | |
... { | |
... 'type': 'accuracy', | |
... 'value': 0.2662102282047272, | |
... 'name': 'Accuracy', | |
... 'verified': False | |
... } | |
... ] | |
... } | |
... ] | |
... } | |
... ] | |
... } | |
True | |
``` | |
""" | |
return { | |
"model-index": eval_results_to_model_index( | |
model_name=model_pretty_name, | |
eval_results=[ | |
EvalResult( | |
task_name=task_pretty_name, | |
task_type=task_id, | |
metric_name=metrics_pretty_name, | |
metric_type=metrics_id, | |
metric_value=metrics_value, | |
dataset_name=dataset_pretty_name, | |
dataset_type=dataset_id, | |
metric_config=metrics_config, | |
verified=metrics_verified, | |
verify_token=metrics_verification_token, | |
dataset_config=dataset_config, | |
dataset_split=dataset_split, | |
dataset_revision=dataset_revision, | |
) | |
], | |
) | |
} | |
def metadata_update( | |
repo_id: str, | |
metadata: Dict, | |
*, | |
repo_type: Optional[str] = None, | |
overwrite: bool = False, | |
token: Optional[str] = None, | |
commit_message: Optional[str] = None, | |
commit_description: Optional[str] = None, | |
revision: Optional[str] = None, | |
create_pr: bool = False, | |
parent_commit: Optional[str] = None, | |
) -> str: | |
""" | |
Updates the metadata in the README.md of a repository on the Hugging Face Hub. | |
If the README.md file doesn't exist yet, a new one is created with metadata and an | |
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown | |
as a Space cannot exist without a `README.md` file. | |
Args: | |
repo_id (`str`): | |
The name of the repository. | |
metadata (`dict`): | |
A dictionary containing the metadata to be updated. | |
repo_type (`str`, *optional*): | |
Set to `"dataset"` or `"space"` if updating to a dataset or space, | |
`None` or `"model"` if updating to a model. Default is `None`. | |
overwrite (`bool`, *optional*, defaults to `False`): | |
If set to `True` an existing field can be overwritten, otherwise | |
attempting to overwrite an existing field will cause an error. | |
token (`str`, *optional*): | |
The Hugging Face authentication token. | |
commit_message (`str`, *optional*): | |
The summary / title / first line of the generated commit. Defaults to | |
`f"Update metadata with huggingface_hub"` | |
commit_description (`str` *optional*) | |
The description of the generated commit | |
revision (`str`, *optional*): | |
The git revision to commit from. Defaults to the head of the | |
`"main"` branch. | |
create_pr (`boolean`, *optional*): | |
Whether or not to create a Pull Request from `revision` with that commit. | |
Defaults to `False`. | |
parent_commit (`str`, *optional*): | |
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. | |
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. | |
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. | |
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be | |
especially useful if the repo is updated / committed to concurrently. | |
Returns: | |
`str`: URL of the commit which updated the card metadata. | |
Example: | |
```python | |
>>> from huggingface_hub import metadata_update | |
>>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF', | |
... 'results': [{'dataset': {'name': 'ReactionGIF', | |
... 'type': 'julien-c/reactiongif'}, | |
... 'metrics': [{'name': 'Recall', | |
... 'type': 'recall', | |
... 'value': 0.7762102282047272}], | |
... 'task': {'name': 'Text Classification', | |
... 'type': 'text-classification'}}]}]} | |
>>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata) | |
``` | |
""" | |
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub" | |
# Card class given repo_type | |
card_class: Type[RepoCard] | |
if repo_type is None or repo_type == "model": | |
card_class = ModelCard | |
elif repo_type == "dataset": | |
card_class = DatasetCard | |
elif repo_type == "space": | |
card_class = RepoCard | |
else: | |
raise ValueError(f"Unknown repo_type: {repo_type}") | |
# Either load repo_card from the Hub or create an empty one. | |
# NOTE: Will not create the repo if it doesn't exist. | |
try: | |
card = card_class.load(repo_id, token=token, repo_type=repo_type) | |
except EntryNotFoundError: | |
if repo_type == "space": | |
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.") | |
# Initialize a ModelCard or DatasetCard from default template and no data. | |
card = card_class.from_template(CardData()) | |
for key, value in metadata.items(): | |
if key == "model-index": | |
# if the new metadata doesn't include a name, either use existing one or repo name | |
if "name" not in value[0]: | |
value[0]["name"] = getattr(card, "model_name", repo_id) | |
model_name, new_results = model_index_to_eval_results(value) | |
if card.data.eval_results is None: | |
card.data.eval_results = new_results | |
card.data.model_name = model_name | |
else: | |
existing_results = card.data.eval_results | |
# Iterate over new results | |
# Iterate over existing results | |
# If both results describe the same metric but value is different: | |
# If overwrite=True: overwrite the metric value | |
# Else: raise ValueError | |
# Else: append new result to existing ones. | |
for new_result in new_results: | |
result_found = False | |
for existing_result in existing_results: | |
if new_result.is_equal_except_value(existing_result): | |
if new_result != existing_result and not overwrite: | |
raise ValueError( | |
"You passed a new value for the existing metric" | |
f" 'name: {new_result.metric_name}, type: " | |
f"{new_result.metric_type}'. Set `overwrite=True`" | |
" to overwrite existing metrics." | |
) | |
result_found = True | |
existing_result.metric_value = new_result.metric_value | |
if existing_result.verified is True: | |
existing_result.verify_token = new_result.verify_token | |
if not result_found: | |
card.data.eval_results.append(new_result) | |
else: | |
# Any metadata that is not a result metric | |
if card.data.get(key) is not None and not overwrite and card.data.get(key) != value: | |
raise ValueError( | |
f"You passed a new value for the existing meta data field '{key}'." | |
" Set `overwrite=True` to overwrite existing metadata." | |
) | |
else: | |
card.data[key] = value | |
return card.push_to_hub( | |
repo_id, | |
token=token, | |
repo_type=repo_type, | |
commit_message=commit_message, | |
commit_description=commit_description, | |
create_pr=create_pr, | |
revision=revision, | |
parent_commit=parent_commit, | |
) | |