repocard.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826
  1. import os
  2. import re
  3. from pathlib import Path
  4. from typing import Any, Literal
  5. import yaml
  6. from huggingface_hub.file_download import hf_hub_download
  7. from huggingface_hub.hf_api import upload_file
  8. from huggingface_hub.repocard_data import (
  9. CardData,
  10. DatasetCardData,
  11. EvalResult,
  12. ModelCardData,
  13. SpaceCardData,
  14. eval_results_to_model_index,
  15. model_index_to_eval_results,
  16. )
  17. from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
  18. from . import constants
  19. from .errors import EntryNotFoundError
  20. from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
  21. logger = logging.get_logger(__name__)
  22. TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
  23. TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
  24. # exact same regex as in the Hub server. Please keep in sync.
  25. # See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
  26. REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
  27. class RepoCard:
  28. card_data_class = CardData
  29. default_template_path = TEMPLATE_MODELCARD_PATH
  30. repo_type = "model"
  31. def __init__(self, content: str, ignore_metadata_errors: bool = False):
  32. """Initialize a RepoCard from string content. The content should be a
  33. Markdown file with a YAML block at the beginning and a Markdown body.
  34. Args:
  35. content (`str`): The content of the Markdown file.
  36. Example:
  37. ```python
  38. >>> from huggingface_hub.repocard import RepoCard
  39. >>> text = '''
  40. ... ---
  41. ... language: en
  42. ... license: mit
  43. ... ---
  44. ...
  45. ... # My repo
  46. ... '''
  47. >>> card = RepoCard(text)
  48. >>> card.data.to_dict()
  49. {'language': 'en', 'license': 'mit'}
  50. >>> card.text
  51. '\\n# My repo\\n'
  52. ```
  53. > [!TIP]
  54. > Raises the following error:
  55. >
  56. > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  57. > when the content of the repo card metadata is not a dictionary.
  58. """
  59. # Set the content of the RepoCard, as well as underlying .data and .text attributes.
  60. # See the `content` property setter for more details.
  61. self.ignore_metadata_errors = ignore_metadata_errors
  62. self.content = content
  63. @property
  64. def content(self):
  65. """The content of the RepoCard, including the YAML block and the Markdown body."""
  66. line_break = _detect_line_ending(self._content) or "\n"
  67. return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}"
  68. @content.setter
  69. def content(self, content: str):
  70. """Set the content of the RepoCard."""
  71. self._content = content
  72. match = REGEX_YAML_BLOCK.search(content)
  73. if match:
  74. # Metadata found in the YAML block
  75. yaml_block = match.group(2)
  76. self.text = content[match.end() :]
  77. data_dict = yaml.safe_load(yaml_block)
  78. if data_dict is None:
  79. data_dict = {}
  80. # The YAML block's data should be a dictionary
  81. if not isinstance(data_dict, dict):
  82. raise ValueError("repo card metadata block should be a dict")
  83. else:
  84. # Model card without metadata... create empty metadata
  85. logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
  86. data_dict = {}
  87. self.text = content
  88. self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
  89. self._original_order = list(data_dict.keys())
  90. def __str__(self):
  91. return self.content
  92. def save(self, filepath: Path | str):
  93. r"""Save a RepoCard to a file.
  94. Args:
  95. filepath (`Union[Path, str]`): Filepath to the markdown file to save.
  96. Example:
  97. ```python
  98. >>> from huggingface_hub.repocard import RepoCard
  99. >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
  100. >>> card.save("/tmp/test.md")
  101. ```
  102. """
  103. filepath = Path(filepath)
  104. filepath.parent.mkdir(parents=True, exist_ok=True)
  105. # Preserve newlines as in the existing file.
  106. with open(filepath, mode="w", newline="", encoding="utf-8") as f:
  107. f.write(str(self))
  108. @classmethod
  109. def load(
  110. cls,
  111. repo_id_or_path: str | Path,
  112. repo_type: str | None = None,
  113. token: str | None = None,
  114. ignore_metadata_errors: bool = False,
  115. ):
  116. """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
  117. Args:
  118. repo_id_or_path (`Union[str, Path]`):
  119. The repo ID associated with a Hugging Face Hub repo or a local filepath.
  120. repo_type (`str`, *optional*):
  121. The type of Hugging Face repo to push to. Defaults to None, which will use "model". Other options
  122. are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
  123. class, the default value will be the child class's `repo_type`.
  124. token (`str`, *optional*):
  125. Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
  126. ignore_metadata_errors (`str`):
  127. If True, errors while parsing the metadata section will be ignored. Some information might be lost during
  128. the process. Use it at your own risk.
  129. Returns:
  130. [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
  131. README.md file or filepath.
  132. Example:
  133. ```python
  134. >>> from huggingface_hub.repocard import RepoCard
  135. >>> card = RepoCard.load("nateraw/food")
  136. >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
  137. ```
  138. """
  139. if Path(repo_id_or_path).is_file():
  140. card_path = Path(repo_id_or_path)
  141. elif isinstance(repo_id_or_path, str):
  142. card_path = Path(
  143. hf_hub_download(
  144. repo_id_or_path,
  145. constants.REPOCARD_NAME,
  146. repo_type=repo_type or cls.repo_type,
  147. token=token,
  148. )
  149. )
  150. else:
  151. raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
  152. # Preserve newlines in the existing file.
  153. with card_path.open(mode="r", newline="", encoding="utf-8") as f:
  154. return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
  155. def validate(self, repo_type: str | None = None):
  156. """Validates card against Hugging Face Hub's card validation logic.
  157. Using this function requires access to the internet, so it is only called
  158. internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
  159. Args:
  160. repo_type (`str`, *optional*, defaults to "model"):
  161. The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
  162. If this function is called from a child class, the default will be the child class's `repo_type`.
  163. > [!TIP]
  164. > Raises the following errors:
  165. >
  166. > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  167. > if the card fails validation checks.
  168. > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
  169. > if the request to the Hub API fails for any other reason.
  170. """
  171. # If repo type is provided, otherwise, use the repo type of the card.
  172. repo_type = repo_type or self.repo_type
  173. body = {
  174. "repoType": repo_type,
  175. "content": str(self),
  176. }
  177. headers = {"Accept": "text/plain"}
  178. try:
  179. response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
  180. hf_raise_for_status(response)
  181. except HfHubHTTPError as exc:
  182. if response.status_code == 400:
  183. raise ValueError(response.text)
  184. else:
  185. raise exc
  186. def push_to_hub(
  187. self,
  188. repo_id: str,
  189. token: str | None = None,
  190. repo_type: str | None = None,
  191. commit_message: str | None = None,
  192. commit_description: str | None = None,
  193. revision: str | None = None,
  194. create_pr: bool | None = None,
  195. parent_commit: str | None = None,
  196. ):
  197. """Push a RepoCard to a Hugging Face Hub repo.
  198. Args:
  199. repo_id (`str`):
  200. The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
  201. token (`str`, *optional*):
  202. Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
  203. the stored token.
  204. repo_type (`str`, *optional*, defaults to "model"):
  205. The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
  206. function is called by a child class, it will default to the child class's `repo_type`.
  207. commit_message (`str`, *optional*):
  208. The summary / title / first line of the generated commit.
  209. commit_description (`str`, *optional*)
  210. The description of the generated commit.
  211. revision (`str`, *optional*):
  212. The git revision to commit from. Defaults to the head of the `"main"` branch.
  213. create_pr (`bool`, *optional*):
  214. Whether or not to create a Pull Request with this commit. Defaults to `False`.
  215. parent_commit (`str`, *optional*):
  216. The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
  217. If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
  218. If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
  219. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
  220. especially useful if the repo is updated / committed too concurrently.
  221. Returns:
  222. `str`: URL of the commit which updated the card metadata.
  223. """
  224. # If repo type is provided, otherwise, use the repo type of the card.
  225. repo_type = repo_type or self.repo_type
  226. # Validate card before pushing to hub
  227. self.validate(repo_type=repo_type)
  228. with SoftTemporaryDirectory() as tmpdir:
  229. tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
  230. tmp_path.write_text(str(self), encoding="utf-8")
  231. url = upload_file(
  232. path_or_fileobj=str(tmp_path),
  233. path_in_repo=constants.REPOCARD_NAME,
  234. repo_id=repo_id,
  235. token=token,
  236. repo_type=repo_type,
  237. commit_message=commit_message,
  238. commit_description=commit_description,
  239. create_pr=create_pr,
  240. revision=revision,
  241. parent_commit=parent_commit,
  242. )
  243. return url
  244. @classmethod
  245. def from_template(
  246. cls,
  247. card_data: CardData,
  248. template_path: str | None = None,
  249. template_str: str | None = None,
  250. **template_kwargs,
  251. ):
  252. """Initialize a RepoCard from a template. By default, it uses the default template.
  253. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  254. Args:
  255. card_data (`huggingface_hub.CardData`):
  256. A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
  257. header of the repo card on the Hugging Face Hub.
  258. template_path (`str`, *optional*):
  259. A path to a markdown file with optional Jinja template variables that can be filled
  260. in with `template_kwargs`. Defaults to the default template.
  261. Returns:
  262. [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
  263. template.
  264. """
  265. if is_jinja_available():
  266. import jinja2
  267. else:
  268. raise ImportError(
  269. "Using RepoCard.from_template requires Jinja2 to be installed. Please"
  270. " install it with `pip install Jinja2`."
  271. )
  272. kwargs = card_data.to_dict().copy()
  273. kwargs.update(template_kwargs) # Template_kwargs have priority
  274. if template_path is not None:
  275. template_str = Path(template_path).read_text()
  276. if template_str is None:
  277. template_str = Path(cls.default_template_path).read_text()
  278. template = jinja2.Template(template_str)
  279. content = template.render(card_data=card_data.to_yaml(), **kwargs)
  280. return cls(content)
  281. class ModelCard(RepoCard):
  282. card_data_class = ModelCardData # type: ignore[assignment]
  283. default_template_path = TEMPLATE_MODELCARD_PATH
  284. repo_type = "model"
  285. @classmethod
  286. def from_template( # type: ignore # violates Liskov property but easier to use
  287. cls,
  288. card_data: ModelCardData,
  289. template_path: str | None = None,
  290. template_str: str | None = None,
  291. **template_kwargs,
  292. ):
  293. """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
  294. https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
  295. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  296. Args:
  297. card_data (`huggingface_hub.ModelCardData`):
  298. A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
  299. header of the model card on the Hugging Face Hub.
  300. template_path (`str`, *optional*):
  301. A path to a markdown file with optional Jinja template variables that can be filled
  302. in with `template_kwargs`. Defaults to the default template.
  303. Returns:
  304. [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
  305. template.
  306. Example:
  307. ```python
  308. >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
  309. >>> # Using the Default Template
  310. >>> card_data = ModelCardData(
  311. ... language='en',
  312. ... license='mit',
  313. ... library_name='timm',
  314. ... tags=['image-classification', 'resnet'],
  315. ... datasets=['beans'],
  316. ... metrics=['accuracy'],
  317. ... )
  318. >>> card = ModelCard.from_template(
  319. ... card_data,
  320. ... model_description='This model does x + y...'
  321. ... )
  322. >>> # Including Evaluation Results
  323. >>> card_data = ModelCardData(
  324. ... language='en',
  325. ... tags=['image-classification', 'resnet'],
  326. ... eval_results=[
  327. ... EvalResult(
  328. ... task_type='image-classification',
  329. ... dataset_type='beans',
  330. ... dataset_name='Beans',
  331. ... metric_type='accuracy',
  332. ... metric_value=0.9,
  333. ... ),
  334. ... ],
  335. ... model_name='my-cool-model',
  336. ... )
  337. >>> card = ModelCard.from_template(card_data)
  338. >>> # Using a Custom Template
  339. >>> card_data = ModelCardData(
  340. ... language='en',
  341. ... tags=['image-classification', 'resnet']
  342. ... )
  343. >>> card = ModelCard.from_template(
  344. ... card_data=card_data,
  345. ... template_path='./src/huggingface_hub/templates/modelcard_template.md',
  346. ... custom_template_var='custom value', # will be replaced in template if it exists
  347. ... )
  348. ```
  349. """
  350. return super().from_template(card_data, template_path, template_str, **template_kwargs)
  351. class DatasetCard(RepoCard):
  352. card_data_class = DatasetCardData # type: ignore[assignment]
  353. default_template_path = TEMPLATE_DATASETCARD_PATH
  354. repo_type = "dataset"
  355. @classmethod
  356. def from_template( # type: ignore # violates Liskov property but easier to use
  357. cls,
  358. card_data: DatasetCardData,
  359. template_path: str | None = None,
  360. template_str: str | None = None,
  361. **template_kwargs,
  362. ):
  363. """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
  364. https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
  365. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  366. Args:
  367. card_data (`huggingface_hub.DatasetCardData`):
  368. A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
  369. header of the dataset card on the Hugging Face Hub.
  370. template_path (`str`, *optional*):
  371. A path to a markdown file with optional Jinja template variables that can be filled
  372. in with `template_kwargs`. Defaults to the default template.
  373. Returns:
  374. [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
  375. template.
  376. Example:
  377. ```python
  378. >>> from huggingface_hub import DatasetCard, DatasetCardData
  379. >>> # Using the Default Template
  380. >>> card_data = DatasetCardData(
  381. ... language='en',
  382. ... license='mit',
  383. ... annotations_creators='crowdsourced',
  384. ... task_categories=['text-classification'],
  385. ... task_ids=['sentiment-classification', 'text-scoring'],
  386. ... multilinguality='monolingual',
  387. ... pretty_name='My Text Classification Dataset',
  388. ... )
  389. >>> card = DatasetCard.from_template(
  390. ... card_data,
  391. ... pretty_name=card_data.pretty_name,
  392. ... )
  393. >>> # Using a Custom Template
  394. >>> card_data = DatasetCardData(
  395. ... language='en',
  396. ... license='mit',
  397. ... )
  398. >>> card = DatasetCard.from_template(
  399. ... card_data=card_data,
  400. ... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
  401. ... custom_template_var='custom value', # will be replaced in template if it exists
  402. ... )
  403. ```
  404. """
  405. return super().from_template(card_data, template_path, template_str, **template_kwargs)
  406. class SpaceCard(RepoCard):
  407. card_data_class = SpaceCardData # type: ignore[assignment]
  408. default_template_path = TEMPLATE_MODELCARD_PATH
  409. repo_type = "space"
  410. def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
  411. """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
  412. Uses same implementation as in Hub server, keep it in sync.
  413. Returns:
  414. str: The detected line ending of the string.
  415. """
  416. cr = content.count("\r")
  417. lf = content.count("\n")
  418. crlf = content.count("\r\n")
  419. if cr + lf == 0:
  420. return None
  421. if crlf == cr and crlf == lf:
  422. return "\r\n"
  423. if cr > lf:
  424. return "\r"
  425. else:
  426. return "\n"
  427. def metadata_load(local_path: str | Path) -> dict | None:
  428. content = Path(local_path).read_text()
  429. match = REGEX_YAML_BLOCK.search(content)
  430. if match:
  431. yaml_block = match.group(2)
  432. data = yaml.safe_load(yaml_block)
  433. if data is None or isinstance(data, dict):
  434. return data
  435. raise ValueError("repo card metadata block should be a dict")
  436. else:
  437. return None
  438. def metadata_save(local_path: str | Path, data: dict) -> None:
  439. """
  440. Save the metadata dict in the upper YAML part Trying to preserve newlines as
  441. in the existing file. Docs about open() with newline="" parameter:
  442. https://docs.python.org/3/library/functions.html?highlight=open#open Does
  443. not work with "^M" linebreaks, which are replaced by \n
  444. """
  445. line_break = "\n"
  446. content = ""
  447. # try to detect existing newline character
  448. if os.path.exists(local_path):
  449. with open(local_path, newline="", encoding="utf8") as readme:
  450. content = readme.read()
  451. if isinstance(readme.newlines, tuple):
  452. line_break = readme.newlines[0]
  453. elif isinstance(readme.newlines, str):
  454. line_break = readme.newlines
  455. # creates a new file if it not
  456. with open(local_path, "w", newline="", encoding="utf8") as readme:
  457. data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
  458. # sort_keys: keep dict order
  459. match = REGEX_YAML_BLOCK.search(content)
  460. if match:
  461. output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
  462. else:
  463. output = f"---{line_break}{data_yaml}---{line_break}{content}"
  464. readme.write(output)
  465. readme.close()
  466. def metadata_eval_result(
  467. *,
  468. model_pretty_name: str,
  469. task_pretty_name: str,
  470. task_id: str,
  471. metrics_pretty_name: str,
  472. metrics_id: str,
  473. metrics_value: Any,
  474. dataset_pretty_name: str,
  475. dataset_id: str,
  476. metrics_config: str | None = None,
  477. metrics_verified: bool = False,
  478. dataset_config: str | None = None,
  479. dataset_split: str | None = None,
  480. dataset_revision: str | None = None,
  481. metrics_verification_token: str | None = None,
  482. ) -> dict:
  483. """
  484. Creates a metadata dict with the result from a model evaluated on a dataset.
  485. Args:
  486. model_pretty_name (`str`):
  487. The name of the model in natural language.
  488. task_pretty_name (`str`):
  489. The name of a task in natural language.
  490. task_id (`str`):
  491. Example: automatic-speech-recognition. A task id.
  492. metrics_pretty_name (`str`):
  493. A name for the metric in natural language. Example: Test WER.
  494. metrics_id (`str`):
  495. Example: wer. A metric id from https://hf.co/metrics.
  496. metrics_value (`Any`):
  497. The value from the metric. Example: 20.0 or "20.0 ± 1.2".
  498. dataset_pretty_name (`str`):
  499. The name of the dataset in natural language.
  500. dataset_id (`str`):
  501. Example: common_voice. A dataset id from https://hf.co/datasets.
  502. metrics_config (`str`, *optional*):
  503. The name of the metric configuration used in `load_metric()`.
  504. Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
  505. metrics_verified (`bool`, *optional*, defaults to `False`):
  506. Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
  507. dataset_config (`str`, *optional*):
  508. Example: fr. The name of the dataset configuration used in `load_dataset()`.
  509. dataset_split (`str`, *optional*):
  510. Example: test. The name of the dataset split used in `load_dataset()`.
  511. dataset_revision (`str`, *optional*):
  512. Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
  513. used in `load_dataset()`.
  514. metrics_verification_token (`bool`, *optional*):
  515. A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
  516. Returns:
  517. `dict`: a metadata dict with the result from a model evaluated on a dataset.
  518. Example:
  519. ```python
  520. >>> from huggingface_hub import metadata_eval_result
  521. >>> results = metadata_eval_result(
  522. ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
  523. ... task_pretty_name="Text Classification",
  524. ... task_id="text-classification",
  525. ... metrics_pretty_name="Accuracy",
  526. ... metrics_id="accuracy",
  527. ... metrics_value=0.2662102282047272,
  528. ... dataset_pretty_name="ReactionJPEG",
  529. ... dataset_id="julien-c/reactionjpeg",
  530. ... dataset_config="default",
  531. ... dataset_split="test",
  532. ... )
  533. >>> results == {
  534. ... 'model-index': [
  535. ... {
  536. ... 'name': 'RoBERTa fine-tuned on ReactionGIF',
  537. ... 'results': [
  538. ... {
  539. ... 'task': {
  540. ... 'type': 'text-classification',
  541. ... 'name': 'Text Classification'
  542. ... },
  543. ... 'dataset': {
  544. ... 'name': 'ReactionJPEG',
  545. ... 'type': 'julien-c/reactionjpeg',
  546. ... 'config': 'default',
  547. ... 'split': 'test'
  548. ... },
  549. ... 'metrics': [
  550. ... {
  551. ... 'type': 'accuracy',
  552. ... 'value': 0.2662102282047272,
  553. ... 'name': 'Accuracy',
  554. ... 'verified': False
  555. ... }
  556. ... ]
  557. ... }
  558. ... ]
  559. ... }
  560. ... ]
  561. ... }
  562. True
  563. ```
  564. """
  565. return {
  566. "model-index": eval_results_to_model_index(
  567. model_name=model_pretty_name,
  568. eval_results=[
  569. EvalResult(
  570. task_name=task_pretty_name,
  571. task_type=task_id,
  572. metric_name=metrics_pretty_name,
  573. metric_type=metrics_id,
  574. metric_value=metrics_value,
  575. dataset_name=dataset_pretty_name,
  576. dataset_type=dataset_id,
  577. metric_config=metrics_config,
  578. verified=metrics_verified,
  579. verify_token=metrics_verification_token,
  580. dataset_config=dataset_config,
  581. dataset_split=dataset_split,
  582. dataset_revision=dataset_revision,
  583. )
  584. ],
  585. )
  586. }
  587. @validate_hf_hub_args
  588. def metadata_update(
  589. repo_id: str,
  590. metadata: dict,
  591. *,
  592. repo_type: str | None = None,
  593. overwrite: bool = False,
  594. token: str | None = None,
  595. commit_message: str | None = None,
  596. commit_description: str | None = None,
  597. revision: str | None = None,
  598. create_pr: bool = False,
  599. parent_commit: str | None = None,
  600. ) -> str:
  601. """
  602. Updates the metadata in the README.md of a repository on the Hugging Face Hub.
  603. If the README.md file doesn't exist yet, a new one is created with metadata and
  604. the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
  605. as a Space cannot exist without a `README.md` file.
  606. Args:
  607. repo_id (`str`):
  608. The name of the repository.
  609. metadata (`dict`):
  610. A dictionary containing the metadata to be updated.
  611. repo_type (`str`, *optional*):
  612. Set to `"dataset"` or `"space"` if updating to a dataset or space,
  613. `None` or `"model"` if updating to a model. Default is `None`.
  614. overwrite (`bool`, *optional*, defaults to `False`):
  615. If set to `True` an existing field can be overwritten, otherwise
  616. attempting to overwrite an existing field will cause an error.
  617. token (`str`, *optional*):
  618. The Hugging Face authentication token.
  619. commit_message (`str`, *optional*):
  620. The summary / title / first line of the generated commit. Defaults to
  621. `f"Update metadata with huggingface_hub"`
  622. commit_description (`str` *optional*)
  623. The description of the generated commit
  624. revision (`str`, *optional*):
  625. The git revision to commit from. Defaults to the head of the
  626. `"main"` branch.
  627. create_pr (`boolean`, *optional*):
  628. Whether or not to create a Pull Request from `revision` with that commit.
  629. Defaults to `False`.
  630. parent_commit (`str`, *optional*):
  631. The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
  632. If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
  633. If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
  634. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
  635. especially useful if the repo is updated / committed too concurrently.
  636. Returns:
  637. `str`: URL of the commit which updated the card metadata.
  638. Example:
  639. ```python
  640. >>> from huggingface_hub import metadata_update
  641. >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
  642. ... 'results': [{'dataset': {'name': 'ReactionGIF',
  643. ... 'type': 'julien-c/reactiongif'},
  644. ... 'metrics': [{'name': 'Recall',
  645. ... 'type': 'recall',
  646. ... 'value': 0.7762102282047272}],
  647. ... 'task': {'name': 'Text Classification',
  648. ... 'type': 'text-classification'}}]}]}
  649. >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
  650. ```
  651. """
  652. commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
  653. # Card class given repo_type
  654. card_class: type[RepoCard]
  655. if repo_type is None or repo_type == "model":
  656. card_class = ModelCard
  657. elif repo_type == "dataset":
  658. card_class = DatasetCard
  659. elif repo_type == "space":
  660. card_class = RepoCard
  661. else:
  662. raise ValueError(f"Unknown repo_type: {repo_type}")
  663. # Either load repo_card from the Hub or create an empty one.
  664. # NOTE: Will not create the repo if it doesn't exist.
  665. try:
  666. card = card_class.load(repo_id, token=token, repo_type=repo_type)
  667. except EntryNotFoundError:
  668. if repo_type == "space":
  669. raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
  670. # Initialize a ModelCard or DatasetCard from default template and no data.
  671. # Cast to the concrete expected card type to satisfy type checkers.
  672. card = card_class.from_template(CardData()) # type: ignore
  673. for key, value in metadata.items():
  674. if key == "model-index":
  675. # if the new metadata doesn't include a name, either use existing one or repo name
  676. if "name" not in value[0]:
  677. value[0]["name"] = getattr(card, "model_name", repo_id)
  678. model_name, new_results = model_index_to_eval_results(value)
  679. if card.data.eval_results is None:
  680. card.data.eval_results = new_results
  681. card.data.model_name = model_name
  682. else:
  683. existing_results = card.data.eval_results
  684. # Iterate over new results
  685. # Iterate over existing results
  686. # If both results describe the same metric but value is different:
  687. # If overwrite=True: overwrite the metric value
  688. # Else: raise ValueError
  689. # Else: append new result to existing ones.
  690. for new_result in new_results:
  691. result_found = False
  692. for existing_result in existing_results:
  693. if new_result.is_equal_except_value(existing_result):
  694. if new_result != existing_result and not overwrite:
  695. raise ValueError(
  696. "You passed a new value for the existing metric"
  697. f" 'name: {new_result.metric_name}, type: "
  698. f"{new_result.metric_type}'. Set `overwrite=True`"
  699. " to overwrite existing metrics."
  700. )
  701. result_found = True
  702. existing_result.metric_value = new_result.metric_value
  703. if existing_result.verified is True:
  704. existing_result.verify_token = new_result.verify_token
  705. if not result_found:
  706. card.data.eval_results.append(new_result)
  707. else:
  708. # Any metadata that is not a result metric
  709. if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
  710. raise ValueError(
  711. f"You passed a new value for the existing meta data field '{key}'."
  712. " Set `overwrite=True` to overwrite existing metadata."
  713. )
  714. else:
  715. card.data[key] = value
  716. return card.push_to_hub(
  717. repo_id,
  718. token=token,
  719. repo_type=repo_type,
  720. commit_message=commit_message,
  721. commit_description=commit_description,
  722. create_pr=create_pr,
  723. revision=revision,
  724. parent_commit=parent_commit,
  725. )