repocard_data.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. import copy
  2. from collections import defaultdict
  3. from dataclasses import dataclass
  4. from typing import Any
  5. from huggingface_hub.utils import logging, yaml_dump
  6. logger = logging.get_logger(__name__)
  7. @dataclass
  8. class EvalResult:
  9. """
  10. Flattened representation of individual evaluation results found in model-index of Model Cards.
  11. For more information on the model-index spec, see https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1.
  12. Args:
  13. task_type (`str`):
  14. The task identifier. Example: "image-classification".
  15. dataset_type (`str`):
  16. The dataset identifier. Example: "common_voice". Use dataset id from https://hf.co/datasets.
  17. dataset_name (`str`):
  18. A pretty name for the dataset. Example: "Common Voice (French)".
  19. metric_type (`str`):
  20. The metric identifier. Example: "wer". Use metric id from https://hf.co/metrics.
  21. metric_value (`Any`):
  22. The metric value. Example: 0.9 or "20.0 ± 1.2".
  23. task_name (`str`, *optional*):
  24. A pretty name for the task. Example: "Speech Recognition".
  25. dataset_config (`str`, *optional*):
  26. The name of the dataset configuration used in `load_dataset()`.
  27. Example: fr in `load_dataset("common_voice", "fr")`. See the `datasets` docs for more info:
  28. https://hf.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
  29. dataset_split (`str`, *optional*):
  30. The split used in `load_dataset()`. Example: "test".
  31. dataset_revision (`str`, *optional*):
  32. The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
  33. Example: 5503434ddd753f426f4b38109466949a1217c2bb
  34. dataset_args (`dict[str, Any]`, *optional*):
  35. The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
  36. metric_name (`str`, *optional*):
  37. A pretty name for the metric. Example: "Test WER".
  38. metric_config (`str`, *optional*):
  39. The name of the metric configuration used in `load_metric()`.
  40. Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
  41. See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
  42. metric_args (`dict[str, Any]`, *optional*):
  43. The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
  44. verified (`bool`, *optional*):
  45. Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
  46. verify_token (`str`, *optional*):
  47. A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
  48. source_name (`str`, *optional*):
  49. The name of the source of the evaluation result. Example: "Open LLM Leaderboard".
  50. source_url (`str`, *optional*):
  51. The URL of the source of the evaluation result. Example: "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard".
  52. """
  53. # Required
  54. # The task identifier
  55. # Example: automatic-speech-recognition
  56. task_type: str
  57. # The dataset identifier
  58. # Example: common_voice. Use dataset id from https://hf.co/datasets
  59. dataset_type: str
  60. # A pretty name for the dataset.
  61. # Example: Common Voice (French)
  62. dataset_name: str
  63. # The metric identifier
  64. # Example: wer. Use metric id from https://hf.co/metrics
  65. metric_type: str
  66. # Value of the metric.
  67. # Example: 20.0 or "20.0 ± 1.2"
  68. metric_value: Any
  69. # Optional
  70. # A pretty name for the task.
  71. # Example: Speech Recognition
  72. task_name: str | None = None
  73. # The name of the dataset configuration used in `load_dataset()`.
  74. # Example: fr in `load_dataset("common_voice", "fr")`.
  75. # See the `datasets` docs for more info:
  76. # https://huggingface.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
  77. dataset_config: str | None = None
  78. # The split used in `load_dataset()`.
  79. # Example: test
  80. dataset_split: str | None = None
  81. # The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
  82. # Example: 5503434ddd753f426f4b38109466949a1217c2bb
  83. dataset_revision: str | None = None
  84. # The arguments passed during `Metric.compute()`.
  85. # Example for `bleu`: max_order: 4
  86. dataset_args: dict[str, Any] | None = None
  87. # A pretty name for the metric.
  88. # Example: Test WER
  89. metric_name: str | None = None
  90. # The name of the metric configuration used in `load_metric()`.
  91. # Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
  92. # See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
  93. metric_config: str | None = None
  94. # The arguments passed during `Metric.compute()`.
  95. # Example for `bleu`: max_order: 4
  96. metric_args: dict[str, Any] | None = None
  97. # Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
  98. verified: bool | None = None
  99. # A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
  100. verify_token: str | None = None
  101. # The name of the source of the evaluation result.
  102. # Example: Open LLM Leaderboard
  103. source_name: str | None = None
  104. # The URL of the source of the evaluation result.
  105. # Example: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard
  106. source_url: str | None = None
  107. @property
  108. def unique_identifier(self) -> tuple:
  109. """Returns a tuple that uniquely identifies this evaluation."""
  110. return (
  111. self.task_type,
  112. self.dataset_type,
  113. self.dataset_config,
  114. self.dataset_split,
  115. self.dataset_revision,
  116. )
  117. def is_equal_except_value(self, other: "EvalResult") -> bool:
  118. """
  119. Return True if `self` and `other` describe exactly the same metric but with a
  120. different value.
  121. """
  122. for key, _ in self.__dict__.items():
  123. if key == "metric_value":
  124. continue
  125. # For metrics computed by Hugging Face's evaluation service, `verify_token` is derived from `metric_value`,
  126. # so we exclude it here in the comparison.
  127. if key != "verify_token" and getattr(self, key) != getattr(other, key):
  128. return False
  129. return True
  130. def __post_init__(self) -> None:
  131. if self.source_name is not None and self.source_url is None:
  132. raise ValueError("If `source_name` is provided, `source_url` must also be provided.")
  133. @dataclass
  134. class CardData:
  135. """Structure containing metadata from a RepoCard.
  136. [`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`].
  137. Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data
  138. (example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not
  139. inherit from `dict` to allow this export step.
  140. """
  141. def __init__(self, ignore_metadata_errors: bool = False, **kwargs):
  142. self.__dict__.update(kwargs)
  143. def to_dict(self):
  144. """Converts CardData to a dict.
  145. Returns:
  146. `dict`: CardData represented as a dictionary ready to be dumped to a YAML
  147. block for inclusion in a README.md file.
  148. """
  149. data_dict = copy.deepcopy(self.__dict__)
  150. self._to_dict(data_dict)
  151. return {key: value for key, value in data_dict.items() if value is not None}
  152. def _to_dict(self, data_dict):
  153. """Use this method in child classes to alter the dict representation of the data. Alter the dict in-place.
  154. Args:
  155. data_dict (`dict`): The raw dict representation of the card data.
  156. """
  157. pass
  158. def to_yaml(self, line_break=None, original_order: list[str] | None = None) -> str:
  159. """Dumps CardData to a YAML block for inclusion in a README.md file.
  160. Args:
  161. line_break (str, *optional*):
  162. The line break to use when dumping to yaml.
  163. Returns:
  164. `str`: CardData represented as a YAML block.
  165. """
  166. if original_order:
  167. self.__dict__ = {
  168. k: self.__dict__[k]
  169. for k in original_order + list(set(self.__dict__.keys()) - set(original_order))
  170. if k in self.__dict__
  171. }
  172. return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip()
  173. def __repr__(self):
  174. return repr(self.__dict__)
  175. def __str__(self):
  176. return self.to_yaml()
  177. def get(self, key: str, default: Any = None) -> Any:
  178. """Get value for a given metadata key."""
  179. value = self.__dict__.get(key)
  180. return default if value is None else value
  181. def pop(self, key: str, default: Any = None) -> Any:
  182. """Pop value for a given metadata key."""
  183. return self.__dict__.pop(key, default)
  184. def __getitem__(self, key: str) -> Any:
  185. """Get value for a given metadata key."""
  186. return self.__dict__[key]
  187. def __setitem__(self, key: str, value: Any) -> None:
  188. """Set value for a given metadata key."""
  189. self.__dict__[key] = value
  190. def __contains__(self, key: str) -> bool:
  191. """Check if a given metadata key is set."""
  192. return key in self.__dict__
  193. def __len__(self) -> int:
  194. """Return the number of metadata keys set."""
  195. return len(self.__dict__)
  196. def _validate_eval_results(
  197. eval_results: EvalResult | list[EvalResult] | None,
  198. model_name: str | None,
  199. ) -> list[EvalResult]:
  200. if eval_results is None:
  201. return []
  202. if isinstance(eval_results, EvalResult):
  203. eval_results = [eval_results]
  204. if not isinstance(eval_results, list) or not all(isinstance(r, EvalResult) for r in eval_results):
  205. raise ValueError(
  206. f"`eval_results` should be of type `EvalResult` or a list of `EvalResult`, got {type(eval_results)}."
  207. )
  208. if model_name is None:
  209. raise ValueError("Passing `eval_results` requires `model_name` to be set.")
  210. return eval_results
  211. class ModelCardData(CardData):
  212. """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
  213. Args:
  214. base_model (`str` or `list[str]`, *optional*):
  215. The identifier of the base model from which the model derives. This is applicable for example if your model is a
  216. fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
  217. if your model derives from multiple models). Defaults to None.
  218. datasets (`Union[str, list[str]]`, *optional*):
  219. Dataset or list of datasets that were used to train this model. Should be a dataset ID
  220. found on https://hf.co/datasets. Defaults to None.
  221. eval_results (`Union[list[EvalResult], EvalResult]`, *optional*):
  222. List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
  223. `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
  224. language (`Union[str, list[str]]`, *optional*):
  225. Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
  226. 639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
  227. library_name (`str`, *optional*):
  228. Name of library used by this model. Example: keras or any library from
  229. https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts.
  230. Defaults to None.
  231. license (`str`, *optional*):
  232. License of this model. Example: apache-2.0 or any license from
  233. https://huggingface.co/docs/hub/repositories-licenses. Defaults to None.
  234. license_name (`str`, *optional*):
  235. Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`.
  236. Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead.
  237. license_link (`str`, *optional*):
  238. Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
  239. Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
  240. metrics (`list[str]`, *optional*):
  241. List of metrics used to evaluate this model. Should be a metric name that can be found
  242. at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
  243. model_name (`str`, *optional*):
  244. A name for this model. It is used along with
  245. `eval_results` to construct the `model-index` within the card's metadata. The name
  246. you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
  247. then the repo name is used as a default. Defaults to None.
  248. pipeline_tag (`str`, *optional*):
  249. The pipeline tag associated with the model. Example: "text-classification".
  250. tags (`list[str]`, *optional*):
  251. List of tags to add to your model that can be used when filtering on the Hugging
  252. Face Hub. Defaults to None.
  253. ignore_metadata_errors (`str`):
  254. If True, errors while parsing the metadata section will be ignored. Some information might be lost during
  255. the process. Use it at your own risk.
  256. kwargs (`dict`, *optional*):
  257. Additional metadata that will be added to the model card. Defaults to None.
  258. Example:
  259. ```python
  260. >>> from huggingface_hub import ModelCardData
  261. >>> card_data = ModelCardData(
  262. ... language="en",
  263. ... license="mit",
  264. ... library_name="timm",
  265. ... tags=['image-classification', 'resnet'],
  266. ... )
  267. >>> card_data.to_dict()
  268. {'language': 'en', 'license': 'mit', 'library_name': 'timm', 'tags': ['image-classification', 'resnet']}
  269. ```
  270. """
  271. def __init__(
  272. self,
  273. *,
  274. base_model: str | list[str] | None = None,
  275. datasets: str | list[str] | None = None,
  276. eval_results: list[EvalResult] | None = None,
  277. language: str | list[str] | None = None,
  278. library_name: str | None = None,
  279. license: str | None = None,
  280. license_name: str | None = None,
  281. license_link: str | None = None,
  282. metrics: list[str] | None = None,
  283. model_name: str | None = None,
  284. pipeline_tag: str | None = None,
  285. tags: list[str] | None = None,
  286. ignore_metadata_errors: bool = False,
  287. **kwargs,
  288. ):
  289. self.base_model = base_model
  290. self.datasets = datasets
  291. self.eval_results = eval_results
  292. self.language = language
  293. self.library_name = library_name
  294. self.license = license
  295. self.license_name = license_name
  296. self.license_link = license_link
  297. self.metrics = metrics
  298. self.model_name = model_name
  299. self.pipeline_tag = pipeline_tag
  300. self.tags = _to_unique_list(tags)
  301. model_index = kwargs.pop("model-index", None)
  302. if model_index:
  303. try:
  304. model_name, eval_results = model_index_to_eval_results(model_index)
  305. self.model_name = model_name
  306. self.eval_results = eval_results
  307. except (KeyError, TypeError) as error:
  308. if ignore_metadata_errors:
  309. logger.warning("Invalid model-index. Not loading eval results into CardData.")
  310. else:
  311. raise ValueError(
  312. f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass"
  313. " `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:"
  314. " some information will be lost. Use it at your own risk."
  315. )
  316. super().__init__(**kwargs)
  317. if self.eval_results:
  318. try:
  319. self.eval_results = _validate_eval_results(self.eval_results, self.model_name)
  320. except Exception as e:
  321. if ignore_metadata_errors:
  322. logger.warning(f"Failed to validate eval_results: {e}. Not loading eval results into CardData.")
  323. else:
  324. raise ValueError(f"Failed to validate eval_results: {e}") from e
  325. def _to_dict(self, data_dict):
  326. """Format the internal data dict. In this case, we convert eval results to a valid model index"""
  327. if self.eval_results is not None:
  328. data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results) # type: ignore
  329. del data_dict["eval_results"], data_dict["model_name"]
  330. class DatasetCardData(CardData):
  331. """Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
  332. Args:
  333. language (`list[str]`, *optional*):
  334. Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
  335. 639-3 code (two/three letters), or a special value like "code", "multilingual".
  336. license (`Union[str, list[str]]`, *optional*):
  337. License(s) of this dataset. Example: apache-2.0 or any license from
  338. https://huggingface.co/docs/hub/repositories-licenses.
  339. annotations_creators (`Union[str, list[str]]`, *optional*):
  340. How the annotations for the dataset were created.
  341. Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
  342. language_creators (`Union[str, list[str]]`, *optional*):
  343. How the text-based data in the dataset was created.
  344. Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
  345. multilinguality (`Union[str, list[str]]`, *optional*):
  346. Whether the dataset is multilingual.
  347. Options are: 'monolingual', 'multilingual', 'translation', 'other'.
  348. size_categories (`Union[str, list[str]]`, *optional*):
  349. The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
  350. '100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
  351. source_datasets (`list[str]]`, *optional*):
  352. Indicates whether the dataset is an original dataset or extended from another existing dataset.
  353. Options are: 'original' and 'extended'.
  354. task_categories (`Union[str, list[str]]`, *optional*):
  355. What categories of task does the dataset support?
  356. task_ids (`Union[str, list[str]]`, *optional*):
  357. What specific tasks does the dataset support?
  358. paperswithcode_id (`str`, *optional*):
  359. ID of the dataset on PapersWithCode.
  360. pretty_name (`str`, *optional*):
  361. A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
  362. train_eval_index (`dict`, *optional*):
  363. A dictionary that describes the necessary spec for doing evaluation on the Hub.
  364. If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
  365. config_names (`Union[str, list[str]]`, *optional*):
  366. A list of the available dataset configs for the dataset.
  367. """
  368. def __init__(
  369. self,
  370. *,
  371. language: str | list[str] | None = None,
  372. license: str | list[str] | None = None,
  373. annotations_creators: str | list[str] | None = None,
  374. language_creators: str | list[str] | None = None,
  375. multilinguality: str | list[str] | None = None,
  376. size_categories: str | list[str] | None = None,
  377. source_datasets: list[str] | None = None,
  378. task_categories: str | list[str] | None = None,
  379. task_ids: str | list[str] | None = None,
  380. paperswithcode_id: str | None = None,
  381. pretty_name: str | None = None,
  382. train_eval_index: dict | None = None,
  383. config_names: str | list[str] | None = None,
  384. ignore_metadata_errors: bool = False,
  385. **kwargs,
  386. ):
  387. self.annotations_creators = annotations_creators
  388. self.language_creators = language_creators
  389. self.language = language
  390. self.license = license
  391. self.multilinguality = multilinguality
  392. self.size_categories = size_categories
  393. self.source_datasets = source_datasets
  394. self.task_categories = task_categories
  395. self.task_ids = task_ids
  396. self.paperswithcode_id = paperswithcode_id
  397. self.pretty_name = pretty_name
  398. self.config_names = config_names
  399. # TODO - maybe handle this similarly to EvalResult?
  400. self.train_eval_index = train_eval_index or kwargs.pop("train-eval-index", None)
  401. super().__init__(**kwargs)
  402. def _to_dict(self, data_dict):
  403. data_dict["train-eval-index"] = data_dict.pop("train_eval_index")
  404. class SpaceCardData(CardData):
  405. """Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
  406. To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference.
  407. Args:
  408. title (`str`, *optional*)
  409. Title of the Space.
  410. sdk (`str`, *optional*)
  411. SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`).
  412. sdk_version (`str`, *optional*)
  413. Version of the used SDK (if Gradio/Streamlit sdk).
  414. python_version (`str`, *optional*)
  415. Python version used in the Space (if Gradio/Streamlit sdk).
  416. app_file (`str`, *optional*)
  417. Path to your main application file (which contains either gradio or streamlit Python code, or static html code).
  418. Path is relative to the root of the repository.
  419. app_port (`str`, *optional*)
  420. Port on which your application is running. Used only if sdk is `docker`.
  421. license (`str`, *optional*)
  422. License of this model. Example: apache-2.0 or any license from
  423. https://huggingface.co/docs/hub/repositories-licenses.
  424. duplicated_from (`str`, *optional*)
  425. ID of the original Space if this is a duplicated Space.
  426. models (list[`str`], *optional*)
  427. List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
  428. datasets (`list[str]`, *optional*)
  429. List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
  430. tags (`list[str]`, *optional*)
  431. List of tags to add to your Space that can be used when filtering on the Hub.
  432. ignore_metadata_errors (`str`):
  433. If True, errors while parsing the metadata section will be ignored. Some information might be lost during
  434. the process. Use it at your own risk.
  435. kwargs (`dict`, *optional*):
  436. Additional metadata that will be added to the space card.
  437. Example:
  438. ```python
  439. >>> from huggingface_hub import SpaceCardData
  440. >>> card_data = SpaceCardData(
  441. ... title="Dreambooth Training",
  442. ... license="mit",
  443. ... sdk="gradio",
  444. ... duplicated_from="multimodalart/dreambooth-training"
  445. ... )
  446. >>> card_data.to_dict()
  447. {'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'}
  448. ```
  449. """
  450. def __init__(
  451. self,
  452. *,
  453. title: str | None = None,
  454. sdk: str | None = None,
  455. sdk_version: str | None = None,
  456. python_version: str | None = None,
  457. app_file: str | None = None,
  458. app_port: int | None = None,
  459. license: str | None = None,
  460. duplicated_from: str | None = None,
  461. models: list[str] | None = None,
  462. datasets: list[str] | None = None,
  463. tags: list[str] | None = None,
  464. ignore_metadata_errors: bool = False,
  465. **kwargs,
  466. ):
  467. self.title = title
  468. self.sdk = sdk
  469. self.sdk_version = sdk_version
  470. self.python_version = python_version
  471. self.app_file = app_file
  472. self.app_port = app_port
  473. self.license = license
  474. self.duplicated_from = duplicated_from
  475. self.models = models
  476. self.datasets = datasets
  477. self.tags = _to_unique_list(tags)
  478. super().__init__(**kwargs)
  479. def model_index_to_eval_results(model_index: list[dict[str, Any]]) -> tuple[str, list[EvalResult]]:
  480. """Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
  481. A detailed spec of the model index can be found here:
  482. https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
  483. Args:
  484. model_index (`list[dict[str, Any]]`):
  485. A model index data structure, likely coming from a README.md file on the
  486. Hugging Face Hub.
  487. Returns:
  488. model_name (`str`):
  489. The name of the model as found in the model index. This is used as the
  490. identifier for the model on leaderboards like PapersWithCode.
  491. eval_results (`list[EvalResult]`):
  492. A list of `huggingface_hub.EvalResult` objects containing the metrics
  493. reported in the provided model_index.
  494. Example:
  495. ```python
  496. >>> from huggingface_hub.repocard_data import model_index_to_eval_results
  497. >>> # Define a minimal model index
  498. >>> model_index = [
  499. ... {
  500. ... "name": "my-cool-model",
  501. ... "results": [
  502. ... {
  503. ... "task": {
  504. ... "type": "image-classification"
  505. ... },
  506. ... "dataset": {
  507. ... "type": "beans",
  508. ... "name": "Beans"
  509. ... },
  510. ... "metrics": [
  511. ... {
  512. ... "type": "accuracy",
  513. ... "value": 0.9
  514. ... }
  515. ... ]
  516. ... }
  517. ... ]
  518. ... }
  519. ... ]
  520. >>> model_name, eval_results = model_index_to_eval_results(model_index)
  521. >>> model_name
  522. 'my-cool-model'
  523. >>> eval_results[0].task_type
  524. 'image-classification'
  525. >>> eval_results[0].metric_type
  526. 'accuracy'
  527. ```
  528. """
  529. eval_results = []
  530. for elem in model_index:
  531. name = elem["name"]
  532. results = elem["results"]
  533. for result in results:
  534. task_type = result["task"]["type"]
  535. task_name = result["task"].get("name")
  536. dataset_type = result["dataset"]["type"]
  537. dataset_name = result["dataset"]["name"]
  538. dataset_config = result["dataset"].get("config")
  539. dataset_split = result["dataset"].get("split")
  540. dataset_revision = result["dataset"].get("revision")
  541. dataset_args = result["dataset"].get("args")
  542. source_name = result.get("source", {}).get("name")
  543. source_url = result.get("source", {}).get("url")
  544. for metric in result["metrics"]:
  545. metric_type = metric["type"]
  546. metric_value = metric["value"]
  547. metric_name = metric.get("name")
  548. metric_args = metric.get("args")
  549. metric_config = metric.get("config")
  550. verified = metric.get("verified")
  551. verify_token = metric.get("verifyToken")
  552. eval_result = EvalResult(
  553. task_type=task_type, # Required
  554. dataset_type=dataset_type, # Required
  555. dataset_name=dataset_name, # Required
  556. metric_type=metric_type, # Required
  557. metric_value=metric_value, # Required
  558. task_name=task_name,
  559. dataset_config=dataset_config,
  560. dataset_split=dataset_split,
  561. dataset_revision=dataset_revision,
  562. dataset_args=dataset_args,
  563. metric_name=metric_name,
  564. metric_args=metric_args,
  565. metric_config=metric_config,
  566. verified=verified,
  567. verify_token=verify_token,
  568. source_name=source_name,
  569. source_url=source_url,
  570. )
  571. eval_results.append(eval_result)
  572. return name, eval_results
  573. def _remove_none(obj):
  574. """
  575. Recursively remove `None` values from a dict. Borrowed from: https://stackoverflow.com/a/20558778
  576. """
  577. if isinstance(obj, (list, tuple, set)):
  578. return type(obj)(_remove_none(x) for x in obj if x is not None)
  579. elif isinstance(obj, dict):
  580. return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None)
  581. else:
  582. return obj
  583. def eval_results_to_model_index(model_name: str, eval_results: list[EvalResult]) -> list[dict[str, Any]]:
  584. """Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
  585. valid model-index that will be compatible with the format expected by the
  586. Hugging Face Hub.
  587. Args:
  588. model_name (`str`):
  589. Name of the model (ex. "my-cool-model"). This is used as the identifier
  590. for the model on leaderboards like PapersWithCode.
  591. eval_results (`list[EvalResult]`):
  592. List of `huggingface_hub.EvalResult` objects containing the metrics to be
  593. reported in the model-index.
  594. Returns:
  595. model_index (`list[dict[str, Any]]`): The eval_results converted to a model-index.
  596. Example:
  597. ```python
  598. >>> from huggingface_hub.repocard_data import eval_results_to_model_index, EvalResult
  599. >>> # Define minimal eval_results
  600. >>> eval_results = [
  601. ... EvalResult(
  602. ... task_type="image-classification", # Required
  603. ... dataset_type="beans", # Required
  604. ... dataset_name="Beans", # Required
  605. ... metric_type="accuracy", # Required
  606. ... metric_value=0.9, # Required
  607. ... )
  608. ... ]
  609. >>> eval_results_to_model_index("my-cool-model", eval_results)
  610. [{'name': 'my-cool-model', 'results': [{'task': {'type': 'image-classification'}, 'dataset': {'name': 'Beans', 'type': 'beans'}, 'metrics': [{'type': 'accuracy', 'value': 0.9}]}]}]
  611. ```
  612. """
  613. # Metrics are reported on a unique task-and-dataset basis.
  614. # Here, we make a map of those pairs and the associated EvalResults.
  615. task_and_ds_types_map: dict[Any, list[EvalResult]] = defaultdict(list)
  616. for eval_result in eval_results:
  617. task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
  618. # Use the map from above to generate the model index data.
  619. model_index_data: list[dict[str, Any]] = []
  620. for results in task_and_ds_types_map.values():
  621. # All items from `results` share same metadata
  622. sample_result = results[0]
  623. data: dict[str, Any] = {
  624. "task": {
  625. "type": sample_result.task_type,
  626. "name": sample_result.task_name,
  627. },
  628. "dataset": {
  629. "name": sample_result.dataset_name,
  630. "type": sample_result.dataset_type,
  631. "config": sample_result.dataset_config,
  632. "split": sample_result.dataset_split,
  633. "revision": sample_result.dataset_revision,
  634. "args": sample_result.dataset_args,
  635. },
  636. "metrics": [
  637. {
  638. "type": result.metric_type,
  639. "value": result.metric_value,
  640. "name": result.metric_name,
  641. "config": result.metric_config,
  642. "args": result.metric_args,
  643. "verified": result.verified,
  644. "verifyToken": result.verify_token,
  645. }
  646. for result in results
  647. ],
  648. }
  649. if sample_result.source_url is not None:
  650. source: dict[str, str] = {
  651. "url": sample_result.source_url,
  652. }
  653. if sample_result.source_name is not None:
  654. source["name"] = sample_result.source_name
  655. data["source"] = source
  656. model_index_data.append(data)
  657. # TODO - Check if there cases where this list is longer than one?
  658. # Finally, the model index itself is list of dicts.
  659. model_index = [
  660. {
  661. "name": model_name,
  662. "results": model_index_data,
  663. }
  664. ]
  665. return _remove_none(model_index)
  666. def _to_unique_list(tags: list[str] | None) -> list[str] | None:
  667. if tags is None:
  668. return tags
  669. unique_tags = [] # make tags unique + keep order explicitly
  670. for tag in tags:
  671. if tag not in unique_tags:
  672. unique_tags.append(tag)
  673. return unique_tags