ms_ssim.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from __future__ import annotations
  18. from typing import Optional, Sequence
  19. import torch
  20. import torch.nn.functional as F
  21. from torch import nn
  22. # Based on:
  23. # https://github.com/psyrocloud/MS-SSIM_L1_LOSS
  24. class MS_SSIMLoss(nn.Module):
  25. r"""Creates a criterion that computes MSSIM + L1 loss.
  26. According to [1], we compute the MS_SSIM + L1 loss as follows:
  27. .. math::
  28. \text{loss}(x, y) = \alpha \cdot \mathcal{L_{MSSIM}}(x,y)+(1 - \alpha) \cdot G_\alpha \cdot \mathcal{L_1}(x,y)
  29. Where:
  30. - :math:`\alpha` is the weight parameter.
  31. - :math:`x` and :math:`y` are the reconstructed and true reference images.
  32. - :math:`\mathcal{L_{MSSIM}}` is the MS-SSIM loss.
  33. - :math:`G_\alpha` is the sigma values for computing multi-scale SSIM.
  34. - :math:`\mathcal{L_1}` is the L1 loss.
  35. Reference:
  36. [1]: https://research.nvidia.com/sites/default/files/pubs/2017-03_Loss-Functions-for/NN_ImgProc.pdf#page11
  37. Args:
  38. sigmas: gaussian sigma values.
  39. data_range: the range of the images.
  40. K: k values.
  41. alpha : specifies the alpha value
  42. compensation: specifies the scaling coefficient.
  43. reduction : Specifies the reduction to apply to the
  44. output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
  45. ``'mean'``: the sum of the output will be divided by the number of elements
  46. in the output, ``'sum'``: the output will be summed.
  47. Returns:
  48. The computed loss.
  49. Shape:
  50. - Input1: :math:`(N, C, H, W)`.
  51. - Input2: :math:`(N, C, H, W)`.
  52. - Output: :math:`(N, H, W)` or scalar if reduction is set to ``'mean'`` or ``'sum'``.
  53. Examples:
  54. >>> input1 = torch.rand(1, 3, 5, 5)
  55. >>> input2 = torch.rand(1, 3, 5, 5)
  56. >>> criterion = kornia.losses.MS_SSIMLoss()
  57. >>> loss = criterion(input1, input2)
  58. """
  59. def __init__(
  60. self,
  61. sigmas: Sequence[float] = (0.5, 1.0, 2.0, 4.0, 8.0),
  62. data_range: float = 1.0,
  63. K: tuple[float, float] = (0.01, 0.03),
  64. alpha: float = 0.025,
  65. compensation: float = 200.0,
  66. reduction: str = "mean",
  67. ) -> None:
  68. super().__init__()
  69. self.DR: float = data_range
  70. self.C1: float = (K[0] * data_range) ** 2
  71. self.C2: float = (K[1] * data_range) ** 2
  72. self.pad = int(2 * sigmas[-1])
  73. self.alpha: float = alpha
  74. self.compensation: float = compensation
  75. self.reduction: str = reduction
  76. # Set filter size
  77. filter_size = int(4 * sigmas[-1] + 1)
  78. g_masks = torch.zeros((3 * len(sigmas), 1, filter_size, filter_size))
  79. # Compute mask at different scales
  80. for idx, sigma in enumerate(sigmas):
  81. g_masks[3 * idx + 0, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
  82. g_masks[3 * idx + 1, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
  83. g_masks[3 * idx + 2, 0, :, :] = self._fspecial_gauss_2d(filter_size, sigma)
  84. self.register_buffer("_g_masks", g_masks)
  85. def _fspecial_gauss_1d(
  86. self, size: int, sigma: float, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None
  87. ) -> torch.Tensor:
  88. """Create 1-D gauss kernel.
  89. Args:
  90. size: the size of gauss kernel.
  91. sigma: sigma of normal distribution.
  92. device: device to store the result on.
  93. dtype: dtype of the result.
  94. Returns:
  95. 1D kernel (size).
  96. """
  97. coords = torch.arange(size, device=device, dtype=dtype)
  98. coords -= size // 2
  99. g = torch.exp(-(coords**2) / (2 * sigma**2))
  100. g /= g.sum()
  101. return g.reshape(-1)
  102. def _fspecial_gauss_2d(
  103. self, size: int, sigma: float, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None
  104. ) -> torch.Tensor:
  105. """Create 2-D gauss kernel.
  106. Args:
  107. size: the size of gauss kernel.
  108. sigma: sigma of normal distribution.
  109. device: device to store the result on.
  110. dtype: dtype of the result.
  111. Returns:
  112. 2D kernel (size x size).
  113. """
  114. gaussian_vec = self._fspecial_gauss_1d(size, sigma, device, dtype)
  115. return torch.outer(gaussian_vec, gaussian_vec)
  116. def forward(self, img1: torch.Tensor, img2: torch.Tensor) -> torch.Tensor:
  117. """Compute MS_SSIM loss.
  118. Args:
  119. img1: the predicted image with shape :math:`(B, C, H, W)`.
  120. img2: the target image with a shape of :math:`(B, C, H, W)`.
  121. Returns:
  122. Estimated MS-SSIM_L1 loss.
  123. """
  124. if not isinstance(img1, torch.Tensor):
  125. raise TypeError(f"Input type is not a torch.Tensor. Got {type(img1)}")
  126. if not isinstance(img2, torch.Tensor):
  127. raise TypeError(f"Output type is not a torch.Tensor. Got {type(img2)}")
  128. if not len(img1.shape) == len(img2.shape):
  129. raise ValueError(f"Input shapes should be same. Got {type(img1)} and {type(img2)}.")
  130. g_masks: torch.Tensor = torch.jit.annotate(torch.Tensor, self._g_masks)
  131. CH: int = img1.shape[-3]
  132. mux = F.conv2d(img1, g_masks, groups=CH, padding=self.pad)
  133. muy = F.conv2d(img2, g_masks, groups=CH, padding=self.pad)
  134. mux2 = mux * mux
  135. muy2 = muy * muy
  136. muxy = mux * muy
  137. sigmax2 = F.conv2d(img1 * img1, g_masks, groups=CH, padding=self.pad) - mux2
  138. sigmay2 = F.conv2d(img2 * img2, g_masks, groups=CH, padding=self.pad) - muy2
  139. sigmaxy = F.conv2d(img1 * img2, g_masks, groups=CH, padding=self.pad) - muxy
  140. lc = (2 * muxy + self.C1) / (mux2 + muy2 + self.C1)
  141. cs = (2 * sigmaxy + self.C2) / (sigmax2 + sigmay2 + self.C2)
  142. lM = lc[:, -1, :, :] * lc[:, -2, :, :] * lc[:, -3, :, :]
  143. PIcs = cs.prod(dim=1)
  144. # Compute MS-SSIM loss
  145. loss_ms_ssim = 1 - lM * PIcs
  146. # TODO: pass pointer to function e.g. to make more custom with mse, cosine, etc.
  147. # Compute L1 loss
  148. loss_l1 = F.l1_loss(img1, img2, reduction="none")
  149. # Compute average l1 loss in 3 channels
  150. gaussian_l1 = F.conv2d(loss_l1, g_masks[-CH:], groups=CH, padding=self.pad).mean(1)
  151. # Compute MS-SSIM + L1 loss
  152. loss = self.alpha * loss_ms_ssim + (1 - self.alpha) * gaussian_l1 / self.DR
  153. loss = self.compensation * loss
  154. if self.reduction == "mean":
  155. loss = torch.mean(loss)
  156. elif self.reduction == "sum":
  157. loss = torch.sum(loss)
  158. elif self.reduction == "none":
  159. pass
  160. else:
  161. raise NotImplementedError(f"Invalid reduction mode: {self.reduction}")
  162. return loss