bootstrapping.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections.abc import Sequence
  15. from copy import deepcopy
  16. from typing import Any, Optional, Union, cast
  17. import torch
  18. from lightning_utilities import apply_to_collection
  19. from torch import Tensor
  20. from torch.nn import ModuleList
  21. from torchmetrics.metric import Metric
  22. from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
  23. from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
  24. from torchmetrics.wrappers.abstract import WrapperMetric
  25. if not _MATPLOTLIB_AVAILABLE:
  26. __doctest_skip__ = ["BootStrapper.plot"]
  27. def _bootstrap_sampler(
  28. size: int,
  29. sampling_strategy: str = "poisson",
  30. ) -> torch.Tensor:
  31. """Resample a tensor along its first dimension with replacement.
  32. Args:
  33. size: number of samples
  34. sampling_strategy: the strategy to use for sampling, either ``'poisson'`` or ``'multinomial'``
  35. Returns:
  36. resampled tensor
  37. """
  38. if sampling_strategy == "poisson":
  39. p = torch.distributions.Poisson(1)
  40. n = p.sample((size,))
  41. return torch.arange(size).repeat_interleave(n.long(), dim=0)
  42. if sampling_strategy == "multinomial":
  43. return torch.multinomial(torch.ones(size), num_samples=size, replacement=True)
  44. raise ValueError("Unknown sampling strategy")
  45. class BootStrapper(WrapperMetric):
  46. r"""Using `Turn a Metric into a Bootstrapped`_.
  47. That can automate the process of getting confidence intervals for metric values. This wrapper
  48. class basically keeps multiple copies of the same base metric in memory and whenever ``update`` or
  49. ``forward`` is called, all input tensors are resampled (with replacement) along the first dimension.
  50. Args:
  51. base_metric: base metric class to wrap
  52. num_bootstraps: number of copies to make of the base metric for bootstrapping
  53. mean: if ``True`` return the mean of the bootstraps
  54. std: if ``True`` return the standard deviation of the bootstraps
  55. quantile: if given, returns the quantile of the bootstraps. Can only be used with pytorch version 1.6 or higher
  56. raw: if ``True``, return all bootstrapped values
  57. sampling_strategy:
  58. Determines how to produce bootstrapped samplings. Either ``'poisson'`` or ``multinomial``.
  59. If ``'possion'`` is chosen, the number of times each sample will be included in the bootstrap
  60. will be given by :math:`n\sim Poisson(\lambda=1)`, which approximates the true bootstrap distribution
  61. when the number of samples is large. If ``'multinomial'`` is chosen, we will apply true bootstrapping
  62. at the batch level to approximate bootstrapping over the hole dataset.
  63. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
  64. Example::
  65. >>> from pprint import pprint
  66. >>> from torch import randint
  67. >>> from torchmetrics.wrappers import BootStrapper
  68. >>> from torchmetrics.classification import MulticlassAccuracy
  69. >>> base_metric = MulticlassAccuracy(num_classes=5, average='micro')
  70. >>> bootstrap = BootStrapper(base_metric, num_bootstraps=20)
  71. >>> bootstrap.update(randint(5, (20,)), randint(5, (20,)))
  72. >>> output = bootstrap.compute()
  73. >>> pprint(output)
  74. {'mean': tensor(0.2089), 'std': tensor(0.0772)}
  75. """
  76. full_state_update: Optional[bool] = True
  77. def __init__(
  78. self,
  79. base_metric: Metric,
  80. num_bootstraps: int = 10,
  81. mean: bool = True,
  82. std: bool = True,
  83. quantile: Optional[Union[float, Tensor]] = None,
  84. raw: bool = False,
  85. sampling_strategy: str = "poisson",
  86. **kwargs: Any,
  87. ) -> None:
  88. super().__init__(**kwargs)
  89. if not isinstance(base_metric, Metric):
  90. raise ValueError(
  91. f"Expected base metric to be an instance of torchmetrics.Metric but received {base_metric}"
  92. )
  93. self.metrics = ModuleList([deepcopy(base_metric) for _ in range(num_bootstraps)])
  94. self.num_bootstraps = num_bootstraps
  95. self.mean = mean
  96. self.std = std
  97. self.quantile = quantile
  98. self.raw = raw
  99. allowed_sampling = ("poisson", "multinomial")
  100. if sampling_strategy not in allowed_sampling:
  101. raise ValueError(
  102. f"Expected argument ``sampling_strategy`` to be one of {allowed_sampling}"
  103. f" but received {sampling_strategy}"
  104. )
  105. self.sampling_strategy = sampling_strategy
  106. def update(self, *args: Any, **kwargs: Any) -> None:
  107. """Update the state of the base metric.
  108. Any tensor passed in will be bootstrapped along dimension 0.
  109. """
  110. args_sizes = apply_to_collection(args, torch.Tensor, len)
  111. kwargs_sizes = apply_to_collection(kwargs, torch.Tensor, len)
  112. if len(args_sizes) > 0:
  113. size = args_sizes[0]
  114. elif len(kwargs_sizes) > 0:
  115. size = next(iter(kwargs_sizes.values()))
  116. else:
  117. raise ValueError("None of the input contained tensors, so could not determine the sampling size")
  118. for idx in range(self.num_bootstraps):
  119. sample_idx = _bootstrap_sampler(size, sampling_strategy=self.sampling_strategy).to(self.device)
  120. if sample_idx.numel() == 0:
  121. continue
  122. new_args = apply_to_collection(args, torch.Tensor, torch.index_select, dim=0, index=sample_idx)
  123. new_kwargs = apply_to_collection(kwargs, torch.Tensor, torch.index_select, dim=0, index=sample_idx)
  124. self.metrics[idx].update(*new_args, **new_kwargs) # type: ignore[operator] # needed for mypy
  125. def compute(self) -> dict[str, Tensor]:
  126. """Compute the bootstrapped metric values.
  127. Always returns a dict of tensors, which can contain the following keys: ``mean``, ``std``, ``quantile`` and
  128. ``raw`` depending on how the class was initialized.
  129. """
  130. computed_vals = torch.stack([cast(Metric, m).compute() for m in self.metrics], dim=0)
  131. output_dict = {}
  132. if self.mean:
  133. output_dict["mean"] = computed_vals.mean(dim=0)
  134. if self.std:
  135. output_dict["std"] = computed_vals.std(dim=0)
  136. if self.quantile is not None:
  137. output_dict["quantile"] = torch.quantile(computed_vals, self.quantile)
  138. if self.raw:
  139. output_dict["raw"] = computed_vals
  140. return output_dict
  141. def forward(self, *args: Any, **kwargs: Any) -> Any:
  142. """Use the original forward method of the base metric class."""
  143. return super(WrapperMetric, self).forward(*args, **kwargs)
  144. def reset(self) -> None:
  145. """Reset the state of the base metric."""
  146. for m in self.metrics:
  147. m = cast(Metric, m)
  148. m.reset()
  149. super().reset()
  150. def plot(
  151. self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
  152. ) -> _PLOT_OUT_TYPE:
  153. """Plot a single or multiple values from the metric.
  154. Args:
  155. val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
  156. If no value is provided, will automatically call `metric.compute` and plot that result.
  157. ax: An matplotlib axis object. If provided will add plot to that axis
  158. Returns:
  159. Figure and Axes object
  160. Raises:
  161. ModuleNotFoundError:
  162. If `matplotlib` is not installed
  163. .. plot::
  164. :scale: 75
  165. >>> # Example plotting a single value
  166. >>> import torch
  167. >>> from torchmetrics.wrappers import BootStrapper
  168. >>> from torchmetrics.regression import MeanSquaredError
  169. >>> metric = BootStrapper(MeanSquaredError(), num_bootstraps=20)
  170. >>> metric.update(torch.randn(100,), torch.randn(100,))
  171. >>> fig_, ax_ = metric.plot()
  172. .. plot::
  173. :scale: 75
  174. >>> # Example plotting multiple values
  175. >>> import torch
  176. >>> from torchmetrics.wrappers import BootStrapper
  177. >>> from torchmetrics.regression import MeanSquaredError
  178. >>> metric = BootStrapper(MeanSquaredError(), num_bootstraps=20)
  179. >>> values = [ ]
  180. >>> for _ in range(3):
  181. ... values.append(metric(torch.randn(100,), torch.randn(100,)))
  182. >>> fig_, ax_ = metric.plot(values)
  183. """
  184. return self._plot(val, ax)