| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826 |
- import os
- import re
- from pathlib import Path
- from typing import Any, Literal
- import yaml
- from huggingface_hub.file_download import hf_hub_download
- from huggingface_hub.hf_api import upload_file
- from huggingface_hub.repocard_data import (
- CardData,
- DatasetCardData,
- EvalResult,
- ModelCardData,
- SpaceCardData,
- eval_results_to_model_index,
- model_index_to_eval_results,
- )
- from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
- from . import constants
- from .errors import EntryNotFoundError
- from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
- logger = logging.get_logger(__name__)
- TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
- TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
- # exact same regex as in the Hub server. Please keep in sync.
- # See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
- REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
- class RepoCard:
- card_data_class = CardData
- default_template_path = TEMPLATE_MODELCARD_PATH
- repo_type = "model"
- def __init__(self, content: str, ignore_metadata_errors: bool = False):
- """Initialize a RepoCard from string content. The content should be a
- Markdown file with a YAML block at the beginning and a Markdown body.
- Args:
- content (`str`): The content of the Markdown file.
- Example:
- ```python
- >>> from huggingface_hub.repocard import RepoCard
- >>> text = '''
- ... ---
- ... language: en
- ... license: mit
- ... ---
- ...
- ... # My repo
- ... '''
- >>> card = RepoCard(text)
- >>> card.data.to_dict()
- {'language': 'en', 'license': 'mit'}
- >>> card.text
- '\\n# My repo\\n'
- ```
- > [!TIP]
- > Raises the following error:
- >
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > when the content of the repo card metadata is not a dictionary.
- """
- # Set the content of the RepoCard, as well as underlying .data and .text attributes.
- # See the `content` property setter for more details.
- self.ignore_metadata_errors = ignore_metadata_errors
- self.content = content
- @property
- def content(self):
- """The content of the RepoCard, including the YAML block and the Markdown body."""
- line_break = _detect_line_ending(self._content) or "\n"
- return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}"
- @content.setter
- def content(self, content: str):
- """Set the content of the RepoCard."""
- self._content = content
- match = REGEX_YAML_BLOCK.search(content)
- if match:
- # Metadata found in the YAML block
- yaml_block = match.group(2)
- self.text = content[match.end() :]
- data_dict = yaml.safe_load(yaml_block)
- if data_dict is None:
- data_dict = {}
- # The YAML block's data should be a dictionary
- if not isinstance(data_dict, dict):
- raise ValueError("repo card metadata block should be a dict")
- else:
- # Model card without metadata... create empty metadata
- logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
- data_dict = {}
- self.text = content
- self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
- self._original_order = list(data_dict.keys())
- def __str__(self):
- return self.content
- def save(self, filepath: Path | str):
- r"""Save a RepoCard to a file.
- Args:
- filepath (`Union[Path, str]`): Filepath to the markdown file to save.
- Example:
- ```python
- >>> from huggingface_hub.repocard import RepoCard
- >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
- >>> card.save("/tmp/test.md")
- ```
- """
- filepath = Path(filepath)
- filepath.parent.mkdir(parents=True, exist_ok=True)
- # Preserve newlines as in the existing file.
- with open(filepath, mode="w", newline="", encoding="utf-8") as f:
- f.write(str(self))
- @classmethod
- def load(
- cls,
- repo_id_or_path: str | Path,
- repo_type: str | None = None,
- token: str | None = None,
- ignore_metadata_errors: bool = False,
- ):
- """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
- Args:
- repo_id_or_path (`Union[str, Path]`):
- The repo ID associated with a Hugging Face Hub repo or a local filepath.
- repo_type (`str`, *optional*):
- The type of Hugging Face repo to push to. Defaults to None, which will use "model". Other options
- are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
- class, the default value will be the child class's `repo_type`.
- token (`str`, *optional*):
- Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
- ignore_metadata_errors (`str`):
- If True, errors while parsing the metadata section will be ignored. Some information might be lost during
- the process. Use it at your own risk.
- Returns:
- [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
- README.md file or filepath.
- Example:
- ```python
- >>> from huggingface_hub.repocard import RepoCard
- >>> card = RepoCard.load("nateraw/food")
- >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
- ```
- """
- if Path(repo_id_or_path).is_file():
- card_path = Path(repo_id_or_path)
- elif isinstance(repo_id_or_path, str):
- card_path = Path(
- hf_hub_download(
- repo_id_or_path,
- constants.REPOCARD_NAME,
- repo_type=repo_type or cls.repo_type,
- token=token,
- )
- )
- else:
- raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
- # Preserve newlines in the existing file.
- with card_path.open(mode="r", newline="", encoding="utf-8") as f:
- return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
- def validate(self, repo_type: str | None = None):
- """Validates card against Hugging Face Hub's card validation logic.
- Using this function requires access to the internet, so it is only called
- internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
- Args:
- repo_type (`str`, *optional*, defaults to "model"):
- The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
- If this function is called from a child class, the default will be the child class's `repo_type`.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if the card fails validation checks.
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the request to the Hub API fails for any other reason.
- """
- # If repo type is provided, otherwise, use the repo type of the card.
- repo_type = repo_type or self.repo_type
- body = {
- "repoType": repo_type,
- "content": str(self),
- }
- headers = {"Accept": "text/plain"}
- try:
- response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
- hf_raise_for_status(response)
- except HfHubHTTPError as exc:
- if response.status_code == 400:
- raise ValueError(response.text)
- else:
- raise exc
- def push_to_hub(
- self,
- repo_id: str,
- token: str | None = None,
- repo_type: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- ):
- """Push a RepoCard to a Hugging Face Hub repo.
- Args:
- repo_id (`str`):
- The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
- token (`str`, *optional*):
- Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
- the stored token.
- repo_type (`str`, *optional*, defaults to "model"):
- The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
- function is called by a child class, it will default to the child class's `repo_type`.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit.
- commit_description (`str`, *optional*)
- The description of the generated commit.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- create_pr (`bool`, *optional*):
- Whether or not to create a Pull Request with this commit. Defaults to `False`.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed too concurrently.
- Returns:
- `str`: URL of the commit which updated the card metadata.
- """
- # If repo type is provided, otherwise, use the repo type of the card.
- repo_type = repo_type or self.repo_type
- # Validate card before pushing to hub
- self.validate(repo_type=repo_type)
- with SoftTemporaryDirectory() as tmpdir:
- tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
- tmp_path.write_text(str(self), encoding="utf-8")
- url = upload_file(
- path_or_fileobj=str(tmp_path),
- path_in_repo=constants.REPOCARD_NAME,
- repo_id=repo_id,
- token=token,
- repo_type=repo_type,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- revision=revision,
- parent_commit=parent_commit,
- )
- return url
- @classmethod
- def from_template(
- cls,
- card_data: CardData,
- template_path: str | None = None,
- template_str: str | None = None,
- **template_kwargs,
- ):
- """Initialize a RepoCard from a template. By default, it uses the default template.
- Templates are Jinja2 templates that can be customized by passing keyword arguments.
- Args:
- card_data (`huggingface_hub.CardData`):
- A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
- header of the repo card on the Hugging Face Hub.
- template_path (`str`, *optional*):
- A path to a markdown file with optional Jinja template variables that can be filled
- in with `template_kwargs`. Defaults to the default template.
- Returns:
- [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
- template.
- """
- if is_jinja_available():
- import jinja2
- else:
- raise ImportError(
- "Using RepoCard.from_template requires Jinja2 to be installed. Please"
- " install it with `pip install Jinja2`."
- )
- kwargs = card_data.to_dict().copy()
- kwargs.update(template_kwargs) # Template_kwargs have priority
- if template_path is not None:
- template_str = Path(template_path).read_text()
- if template_str is None:
- template_str = Path(cls.default_template_path).read_text()
- template = jinja2.Template(template_str)
- content = template.render(card_data=card_data.to_yaml(), **kwargs)
- return cls(content)
- class ModelCard(RepoCard):
- card_data_class = ModelCardData # type: ignore[assignment]
- default_template_path = TEMPLATE_MODELCARD_PATH
- repo_type = "model"
- @classmethod
- def from_template( # type: ignore # violates Liskov property but easier to use
- cls,
- card_data: ModelCardData,
- template_path: str | None = None,
- template_str: str | None = None,
- **template_kwargs,
- ):
- """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
- https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
- Templates are Jinja2 templates that can be customized by passing keyword arguments.
- Args:
- card_data (`huggingface_hub.ModelCardData`):
- A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
- header of the model card on the Hugging Face Hub.
- template_path (`str`, *optional*):
- A path to a markdown file with optional Jinja template variables that can be filled
- in with `template_kwargs`. Defaults to the default template.
- Returns:
- [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
- template.
- Example:
- ```python
- >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
- >>> # Using the Default Template
- >>> card_data = ModelCardData(
- ... language='en',
- ... license='mit',
- ... library_name='timm',
- ... tags=['image-classification', 'resnet'],
- ... datasets=['beans'],
- ... metrics=['accuracy'],
- ... )
- >>> card = ModelCard.from_template(
- ... card_data,
- ... model_description='This model does x + y...'
- ... )
- >>> # Including Evaluation Results
- >>> card_data = ModelCardData(
- ... language='en',
- ... tags=['image-classification', 'resnet'],
- ... eval_results=[
- ... EvalResult(
- ... task_type='image-classification',
- ... dataset_type='beans',
- ... dataset_name='Beans',
- ... metric_type='accuracy',
- ... metric_value=0.9,
- ... ),
- ... ],
- ... model_name='my-cool-model',
- ... )
- >>> card = ModelCard.from_template(card_data)
- >>> # Using a Custom Template
- >>> card_data = ModelCardData(
- ... language='en',
- ... tags=['image-classification', 'resnet']
- ... )
- >>> card = ModelCard.from_template(
- ... card_data=card_data,
- ... template_path='./src/huggingface_hub/templates/modelcard_template.md',
- ... custom_template_var='custom value', # will be replaced in template if it exists
- ... )
- ```
- """
- return super().from_template(card_data, template_path, template_str, **template_kwargs)
- class DatasetCard(RepoCard):
- card_data_class = DatasetCardData # type: ignore[assignment]
- default_template_path = TEMPLATE_DATASETCARD_PATH
- repo_type = "dataset"
- @classmethod
- def from_template( # type: ignore # violates Liskov property but easier to use
- cls,
- card_data: DatasetCardData,
- template_path: str | None = None,
- template_str: str | None = None,
- **template_kwargs,
- ):
- """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
- https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
- Templates are Jinja2 templates that can be customized by passing keyword arguments.
- Args:
- card_data (`huggingface_hub.DatasetCardData`):
- A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
- header of the dataset card on the Hugging Face Hub.
- template_path (`str`, *optional*):
- A path to a markdown file with optional Jinja template variables that can be filled
- in with `template_kwargs`. Defaults to the default template.
- Returns:
- [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
- template.
- Example:
- ```python
- >>> from huggingface_hub import DatasetCard, DatasetCardData
- >>> # Using the Default Template
- >>> card_data = DatasetCardData(
- ... language='en',
- ... license='mit',
- ... annotations_creators='crowdsourced',
- ... task_categories=['text-classification'],
- ... task_ids=['sentiment-classification', 'text-scoring'],
- ... multilinguality='monolingual',
- ... pretty_name='My Text Classification Dataset',
- ... )
- >>> card = DatasetCard.from_template(
- ... card_data,
- ... pretty_name=card_data.pretty_name,
- ... )
- >>> # Using a Custom Template
- >>> card_data = DatasetCardData(
- ... language='en',
- ... license='mit',
- ... )
- >>> card = DatasetCard.from_template(
- ... card_data=card_data,
- ... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
- ... custom_template_var='custom value', # will be replaced in template if it exists
- ... )
- ```
- """
- return super().from_template(card_data, template_path, template_str, **template_kwargs)
- class SpaceCard(RepoCard):
- card_data_class = SpaceCardData # type: ignore[assignment]
- default_template_path = TEMPLATE_MODELCARD_PATH
- repo_type = "space"
- def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
- """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
- Uses same implementation as in Hub server, keep it in sync.
- Returns:
- str: The detected line ending of the string.
- """
- cr = content.count("\r")
- lf = content.count("\n")
- crlf = content.count("\r\n")
- if cr + lf == 0:
- return None
- if crlf == cr and crlf == lf:
- return "\r\n"
- if cr > lf:
- return "\r"
- else:
- return "\n"
- def metadata_load(local_path: str | Path) -> dict | None:
- content = Path(local_path).read_text()
- match = REGEX_YAML_BLOCK.search(content)
- if match:
- yaml_block = match.group(2)
- data = yaml.safe_load(yaml_block)
- if data is None or isinstance(data, dict):
- return data
- raise ValueError("repo card metadata block should be a dict")
- else:
- return None
- def metadata_save(local_path: str | Path, data: dict) -> None:
- """
- Save the metadata dict in the upper YAML part Trying to preserve newlines as
- in the existing file. Docs about open() with newline="" parameter:
- https://docs.python.org/3/library/functions.html?highlight=open#open Does
- not work with "^M" linebreaks, which are replaced by \n
- """
- line_break = "\n"
- content = ""
- # try to detect existing newline character
- if os.path.exists(local_path):
- with open(local_path, newline="", encoding="utf8") as readme:
- content = readme.read()
- if isinstance(readme.newlines, tuple):
- line_break = readme.newlines[0]
- elif isinstance(readme.newlines, str):
- line_break = readme.newlines
- # creates a new file if it not
- with open(local_path, "w", newline="", encoding="utf8") as readme:
- data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
- # sort_keys: keep dict order
- match = REGEX_YAML_BLOCK.search(content)
- if match:
- output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
- else:
- output = f"---{line_break}{data_yaml}---{line_break}{content}"
- readme.write(output)
- readme.close()
- def metadata_eval_result(
- *,
- model_pretty_name: str,
- task_pretty_name: str,
- task_id: str,
- metrics_pretty_name: str,
- metrics_id: str,
- metrics_value: Any,
- dataset_pretty_name: str,
- dataset_id: str,
- metrics_config: str | None = None,
- metrics_verified: bool = False,
- dataset_config: str | None = None,
- dataset_split: str | None = None,
- dataset_revision: str | None = None,
- metrics_verification_token: str | None = None,
- ) -> dict:
- """
- Creates a metadata dict with the result from a model evaluated on a dataset.
- Args:
- model_pretty_name (`str`):
- The name of the model in natural language.
- task_pretty_name (`str`):
- The name of a task in natural language.
- task_id (`str`):
- Example: automatic-speech-recognition. A task id.
- metrics_pretty_name (`str`):
- A name for the metric in natural language. Example: Test WER.
- metrics_id (`str`):
- Example: wer. A metric id from https://hf.co/metrics.
- metrics_value (`Any`):
- The value from the metric. Example: 20.0 or "20.0 ± 1.2".
- dataset_pretty_name (`str`):
- The name of the dataset in natural language.
- dataset_id (`str`):
- Example: common_voice. A dataset id from https://hf.co/datasets.
- metrics_config (`str`, *optional*):
- The name of the metric configuration used in `load_metric()`.
- Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
- metrics_verified (`bool`, *optional*, defaults to `False`):
- Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
- dataset_config (`str`, *optional*):
- Example: fr. The name of the dataset configuration used in `load_dataset()`.
- dataset_split (`str`, *optional*):
- Example: test. The name of the dataset split used in `load_dataset()`.
- dataset_revision (`str`, *optional*):
- Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
- used in `load_dataset()`.
- metrics_verification_token (`bool`, *optional*):
- A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
- Returns:
- `dict`: a metadata dict with the result from a model evaluated on a dataset.
- Example:
- ```python
- >>> from huggingface_hub import metadata_eval_result
- >>> results = metadata_eval_result(
- ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
- ... task_pretty_name="Text Classification",
- ... task_id="text-classification",
- ... metrics_pretty_name="Accuracy",
- ... metrics_id="accuracy",
- ... metrics_value=0.2662102282047272,
- ... dataset_pretty_name="ReactionJPEG",
- ... dataset_id="julien-c/reactionjpeg",
- ... dataset_config="default",
- ... dataset_split="test",
- ... )
- >>> results == {
- ... 'model-index': [
- ... {
- ... 'name': 'RoBERTa fine-tuned on ReactionGIF',
- ... 'results': [
- ... {
- ... 'task': {
- ... 'type': 'text-classification',
- ... 'name': 'Text Classification'
- ... },
- ... 'dataset': {
- ... 'name': 'ReactionJPEG',
- ... 'type': 'julien-c/reactionjpeg',
- ... 'config': 'default',
- ... 'split': 'test'
- ... },
- ... 'metrics': [
- ... {
- ... 'type': 'accuracy',
- ... 'value': 0.2662102282047272,
- ... 'name': 'Accuracy',
- ... 'verified': False
- ... }
- ... ]
- ... }
- ... ]
- ... }
- ... ]
- ... }
- True
- ```
- """
- return {
- "model-index": eval_results_to_model_index(
- model_name=model_pretty_name,
- eval_results=[
- EvalResult(
- task_name=task_pretty_name,
- task_type=task_id,
- metric_name=metrics_pretty_name,
- metric_type=metrics_id,
- metric_value=metrics_value,
- dataset_name=dataset_pretty_name,
- dataset_type=dataset_id,
- metric_config=metrics_config,
- verified=metrics_verified,
- verify_token=metrics_verification_token,
- dataset_config=dataset_config,
- dataset_split=dataset_split,
- dataset_revision=dataset_revision,
- )
- ],
- )
- }
- @validate_hf_hub_args
- def metadata_update(
- repo_id: str,
- metadata: dict,
- *,
- repo_type: str | None = None,
- overwrite: bool = False,
- token: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- revision: str | None = None,
- create_pr: bool = False,
- parent_commit: str | None = None,
- ) -> str:
- """
- Updates the metadata in the README.md of a repository on the Hugging Face Hub.
- If the README.md file doesn't exist yet, a new one is created with metadata and
- the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
- as a Space cannot exist without a `README.md` file.
- Args:
- repo_id (`str`):
- The name of the repository.
- metadata (`dict`):
- A dictionary containing the metadata to be updated.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if updating to a dataset or space,
- `None` or `"model"` if updating to a model. Default is `None`.
- overwrite (`bool`, *optional*, defaults to `False`):
- If set to `True` an existing field can be overwritten, otherwise
- attempting to overwrite an existing field will cause an error.
- token (`str`, *optional*):
- The Hugging Face authentication token.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to
- `f"Update metadata with huggingface_hub"`
- commit_description (`str` *optional*)
- The description of the generated commit
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the
- `"main"` branch.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request from `revision` with that commit.
- Defaults to `False`.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed too concurrently.
- Returns:
- `str`: URL of the commit which updated the card metadata.
- Example:
- ```python
- >>> from huggingface_hub import metadata_update
- >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
- ... 'results': [{'dataset': {'name': 'ReactionGIF',
- ... 'type': 'julien-c/reactiongif'},
- ... 'metrics': [{'name': 'Recall',
- ... 'type': 'recall',
- ... 'value': 0.7762102282047272}],
- ... 'task': {'name': 'Text Classification',
- ... 'type': 'text-classification'}}]}]}
- >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
- ```
- """
- commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
- # Card class given repo_type
- card_class: type[RepoCard]
- if repo_type is None or repo_type == "model":
- card_class = ModelCard
- elif repo_type == "dataset":
- card_class = DatasetCard
- elif repo_type == "space":
- card_class = RepoCard
- else:
- raise ValueError(f"Unknown repo_type: {repo_type}")
- # Either load repo_card from the Hub or create an empty one.
- # NOTE: Will not create the repo if it doesn't exist.
- try:
- card = card_class.load(repo_id, token=token, repo_type=repo_type)
- except EntryNotFoundError:
- if repo_type == "space":
- raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
- # Initialize a ModelCard or DatasetCard from default template and no data.
- # Cast to the concrete expected card type to satisfy type checkers.
- card = card_class.from_template(CardData()) # type: ignore
- for key, value in metadata.items():
- if key == "model-index":
- # if the new metadata doesn't include a name, either use existing one or repo name
- if "name" not in value[0]:
- value[0]["name"] = getattr(card, "model_name", repo_id)
- model_name, new_results = model_index_to_eval_results(value)
- if card.data.eval_results is None:
- card.data.eval_results = new_results
- card.data.model_name = model_name
- else:
- existing_results = card.data.eval_results
- # Iterate over new results
- # Iterate over existing results
- # If both results describe the same metric but value is different:
- # If overwrite=True: overwrite the metric value
- # Else: raise ValueError
- # Else: append new result to existing ones.
- for new_result in new_results:
- result_found = False
- for existing_result in existing_results:
- if new_result.is_equal_except_value(existing_result):
- if new_result != existing_result and not overwrite:
- raise ValueError(
- "You passed a new value for the existing metric"
- f" 'name: {new_result.metric_name}, type: "
- f"{new_result.metric_type}'. Set `overwrite=True`"
- " to overwrite existing metrics."
- )
- result_found = True
- existing_result.metric_value = new_result.metric_value
- if existing_result.verified is True:
- existing_result.verify_token = new_result.verify_token
- if not result_found:
- card.data.eval_results.append(new_result)
- else:
- # Any metadata that is not a result metric
- if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
- raise ValueError(
- f"You passed a new value for the existing meta data field '{key}'."
- " Set `overwrite=True` to overwrite existing metadata."
- )
- else:
- card.data[key] = value
- return card.push_to_hub(
- repo_id,
- token=token,
- repo_type=repo_type,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- revision=revision,
- parent_commit=parent_commit,
- )
|