responses.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from typing import Optional, Union
  18. import torch
  19. from kornia.core import Module, Tensor, tensor
  20. from kornia.core.check import KORNIA_CHECK_SHAPE
  21. from kornia.filters import gaussian_blur2d, spatial_gradient
  22. def _get_kernel_size(sigma: float) -> int:
  23. ksize = int(2.0 * 4.0 * sigma + 1.0)
  24. # matches OpenCV, but may cause padding problem for small images
  25. # PyTorch does not allow to pad more than original size.
  26. # Therefore there is a hack in forward function
  27. if ksize % 2 == 0:
  28. ksize += 1
  29. return ksize
  30. def harris_response(
  31. input: Tensor, k: Union[Tensor, float] = 0.04, grads_mode: str = "sobel", sigmas: Optional[Tensor] = None
  32. ) -> Tensor:
  33. r"""Compute the Harris cornerness function.
  34. .. image:: _static/img/harris_response.png
  35. Function does not do any normalization or nms. The response map is computed according the following formulation:
  36. .. math::
  37. R = max(0, det(M) - k \cdot trace(M)^2)
  38. where:
  39. .. math::
  40. M = \sum_{(x,y) \in W}
  41. \begin{bmatrix}
  42. I^{2}_x & I_x I_y \\
  43. I_x I_y & I^{2}_y \\
  44. \end{bmatrix}
  45. and :math:`k` is an empirically determined constant
  46. :math:`k ∈ [ 0.04 , 0.06 ]`
  47. Args:
  48. input: input image with shape :math:`(B, C, H, W)`.
  49. k: the Harris detector free parameter.
  50. grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid.
  51. sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)`
  52. It is necessary for performing non-maxima-suppression across different scale pyramid levels.
  53. See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_.
  54. Return:
  55. the response map per channel with shape :math:`(B, C, H, W)`.
  56. Example:
  57. >>> input = torch.tensor([[[
  58. ... [0., 0., 0., 0., 0., 0., 0.],
  59. ... [0., 1., 1., 1., 1., 1., 0.],
  60. ... [0., 1., 1., 1., 1., 1., 0.],
  61. ... [0., 1., 1., 1., 1., 1., 0.],
  62. ... [0., 1., 1., 1., 1., 1., 0.],
  63. ... [0., 1., 1., 1., 1., 1., 0.],
  64. ... [0., 0., 0., 0., 0., 0., 0.],
  65. ... ]]]) # 1x1x7x7
  66. >>> # compute the response map
  67. harris_response(input, 0.04)
  68. tensor([[[[0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012],
  69. [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
  70. [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
  71. [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
  72. [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
  73. [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
  74. [0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012]]]])
  75. """
  76. # TODO: Recompute doctest
  77. KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])
  78. if sigmas is not None:
  79. if not isinstance(sigmas, Tensor):
  80. raise TypeError(f"sigmas type is not a Tensor. Got {type(sigmas)}")
  81. if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)):
  82. raise ValueError(f"Invalid sigmas shape, we expect B == input.size(0). Got: {sigmas.shape}")
  83. gradients: Tensor = spatial_gradient(input, grads_mode)
  84. dx: Tensor = gradients[:, :, 0]
  85. dy: Tensor = gradients[:, :, 1]
  86. # compute the structure tensor M elements
  87. dx2: Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0))
  88. dy2: Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0))
  89. dxy: Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0))
  90. det_m: Tensor = dx2 * dy2 - dxy * dxy
  91. trace_m: Tensor = dx2 + dy2
  92. # compute the response map
  93. scores: Tensor = det_m - k * (trace_m**2)
  94. if sigmas is not None:
  95. scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
  96. return scores
  97. def gftt_response(input: Tensor, grads_mode: str = "sobel", sigmas: Optional[Tensor] = None) -> Tensor:
  98. r"""Compute the Shi-Tomasi cornerness function.
  99. .. image:: _static/img/gftt_response.png
  100. Function does not do any normalization or nms. The response map is computed according the following formulation:
  101. .. math::
  102. R = min(eig(M))
  103. where:
  104. .. math::
  105. M = \sum_{(x,y) \in W}
  106. \begin{bmatrix}
  107. I^{2}_x & I_x I_y \\
  108. I_x I_y & I^{2}_y \\
  109. \end{bmatrix}
  110. Args:
  111. input: input image with shape :math:`(B, C, H, W)`.
  112. grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid.
  113. sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)`
  114. It is necessary for performing non-maxima-suppression across different scale pyramid levels.
  115. See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_.
  116. Return:
  117. the response map per channel with shape :math:`(B, C, H, W)`.
  118. Example:
  119. >>> input = torch.tensor([[[
  120. ... [0., 0., 0., 0., 0., 0., 0.],
  121. ... [0., 1., 1., 1., 1., 1., 0.],
  122. ... [0., 1., 1., 1., 1., 1., 0.],
  123. ... [0., 1., 1., 1., 1., 1., 0.],
  124. ... [0., 1., 1., 1., 1., 1., 0.],
  125. ... [0., 1., 1., 1., 1., 1., 0.],
  126. ... [0., 0., 0., 0., 0., 0., 0.],
  127. ... ]]]) # 1x1x7x7
  128. >>> # compute the response map
  129. gftt_response(input)
  130. tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155],
  131. [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
  132. [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
  133. [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
  134. [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
  135. [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
  136. [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]])
  137. """
  138. # TODO: Recompute doctest
  139. KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])
  140. gradients: Tensor = spatial_gradient(input, grads_mode)
  141. dx: Tensor = gradients[:, :, 0]
  142. dy: Tensor = gradients[:, :, 1]
  143. dx2: Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0))
  144. dy2: Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0))
  145. dxy: Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0))
  146. det_m: Tensor = dx2 * dy2 - dxy * dxy
  147. trace_m: Tensor = dx2 + dy2
  148. e1: Tensor = 0.5 * (trace_m + torch.sqrt((trace_m**2 - 4 * det_m).abs()))
  149. e2: Tensor = 0.5 * (trace_m - torch.sqrt((trace_m**2 - 4 * det_m).abs()))
  150. scores: Tensor = torch.min(e1, e2)
  151. if sigmas is not None:
  152. scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
  153. return scores
  154. def hessian_response(input: Tensor, grads_mode: str = "sobel", sigmas: Optional[Tensor] = None) -> Tensor:
  155. r"""Compute the absolute of determinant of the Hessian matrix.
  156. .. image:: _static/img/hessian_response.png
  157. Function does not do any normalization or nms. The response map is computed according the following formulation:
  158. .. math::
  159. R = det(H)
  160. where:
  161. .. math::
  162. M = \sum_{(x,y) \in W}
  163. \begin{bmatrix}
  164. I_{xx} & I_{xy} \\
  165. I_{xy} & I_{yy} \\
  166. \end{bmatrix}
  167. Args:
  168. input: input image with shape :math:`(B, C, H, W)`.
  169. grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid.
  170. sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)`
  171. It is necessary for performing non-maxima-suppression across different scale pyramid levels.
  172. See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_.
  173. Return:
  174. the response map per channel with shape :math:`(B, C, H, W)`.
  175. Shape:
  176. - Input: :math:`(B, C, H, W)`
  177. - Output: :math:`(B, C, H, W)`
  178. Examples:
  179. >>> input = torch.tensor([[[
  180. ... [0., 0., 0., 0., 0., 0., 0.],
  181. ... [0., 1., 1., 1., 1., 1., 0.],
  182. ... [0., 1., 1., 1., 1., 1., 0.],
  183. ... [0., 1., 1., 1., 1., 1., 0.],
  184. ... [0., 1., 1., 1., 1., 1., 0.],
  185. ... [0., 1., 1., 1., 1., 1., 0.],
  186. ... [0., 0., 0., 0., 0., 0., 0.],
  187. ... ]]]) # 1x1x7x7
  188. >>> # compute the response map
  189. hessian_response(input)
  190. tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155],
  191. [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
  192. [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
  193. [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
  194. [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
  195. [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
  196. [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]])
  197. """
  198. # TODO: Recompute doctest
  199. KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])
  200. if sigmas is not None:
  201. if not isinstance(sigmas, Tensor):
  202. raise TypeError(f"sigmas type is not a Tensor. Got {type(sigmas)}")
  203. if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)):
  204. raise ValueError(f"Invalid sigmas shape, we expect B == input.size(0). Got: {sigmas.shape}")
  205. gradients: Tensor = spatial_gradient(input, grads_mode, 2)
  206. dxx: Tensor = gradients[:, :, 0]
  207. dxy: Tensor = gradients[:, :, 1]
  208. dyy: Tensor = gradients[:, :, 2]
  209. scores: Tensor = dxx * dyy - dxy**2
  210. if sigmas is not None:
  211. scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
  212. return scores
  213. def dog_response(input: Tensor) -> Tensor:
  214. r"""Compute the Difference-of-Gaussian response.
  215. Args:
  216. input: a given the gaussian 5d tensor :math:`(B, C, D, H, W)`.
  217. Return:
  218. the response map per channel with shape :math:`(B, C, D-1, H, W)`.
  219. """
  220. KORNIA_CHECK_SHAPE(input, ["B", "C", "L", "H", "W"])
  221. return input[:, :, 1:] - input[:, :, :-1]
  222. def dog_response_single(input: Tensor, sigma1: float = 1.0, sigma2: float = 1.6) -> Tensor:
  223. r"""Compute the Difference-of-Gaussian response.
  224. .. image:: _static/img/dog_response_single.png
  225. Args:
  226. input: a given the gaussian 4d tensor :math:`(B, C, H, W)`.
  227. sigma1: lower gaussian sigma
  228. sigma2: bigger gaussian sigma
  229. Return:
  230. the response map per channel with shape :math:`(B, C, H, W)`.
  231. """
  232. KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])
  233. ks1 = _get_kernel_size(sigma1)
  234. ks2 = _get_kernel_size(sigma2)
  235. g1 = gaussian_blur2d(input, (ks1, ks1), (sigma1, sigma1))
  236. g2 = gaussian_blur2d(input, (ks2, ks2), (sigma2, sigma2))
  237. return g2 - g1
  238. class BlobDoG(Module):
  239. r"""Module that calculates Difference-of-Gaussians blobs.
  240. See
  241. :func: `~kornia.feature.dog_response` for details.
  242. """
  243. def __init__(self) -> None:
  244. super().__init__()
  245. def __repr__(self) -> str:
  246. return self.__class__.__name__
  247. def forward(self, input: Tensor, sigmas: Optional[Tensor] = None) -> Tensor:
  248. return dog_response(input)
  249. class BlobDoGSingle(Module):
  250. r"""Module that calculates Difference-of-Gaussians blobs.
  251. .. image:: _static/img/dog_response_single.png
  252. See :func:`~kornia.feature.dog_response_single` for details.
  253. """
  254. def __init__(self, sigma1: float = 1.0, sigma2: float = 1.6) -> None:
  255. super().__init__()
  256. self.sigma1 = sigma1
  257. self.sigma2 = sigma2
  258. def __repr__(self) -> str:
  259. return f"{self.__class__.__name__}, sigma1={self.sigma1}, sigma2={self.sigma2})"
  260. def forward(self, input: Tensor, sigmas: Optional[Tensor] = None) -> Tensor:
  261. return dog_response_single(input, self.sigma1, self.sigma2)
  262. class CornerHarris(Module):
  263. r"""Module that calculates Harris corners.
  264. .. image:: _static/img/harris_response.png
  265. See :func:`~kornia.feature.harris_response` for details.
  266. """
  267. k: Tensor
  268. def __init__(self, k: Union[float, Tensor], grads_mode: str = "sobel") -> None:
  269. super().__init__()
  270. if isinstance(k, float):
  271. self.register_buffer("k", tensor(k))
  272. else:
  273. self.register_buffer("k", k)
  274. self.grads_mode: str = grads_mode
  275. def __repr__(self) -> str:
  276. return f"{self.__class__.__name__}(k={self.k}, grads_mode={self.grads_mode})"
  277. def forward(self, input: Tensor, sigmas: Optional[Tensor] = None) -> Tensor:
  278. return harris_response(input, self.k, self.grads_mode, sigmas)
  279. class CornerGFTT(Module):
  280. r"""Module that calculates Shi-Tomasi corners.
  281. .. image:: _static/img/gftt_response.png
  282. See :func:`~kornia.feature.gftt_response` for details.
  283. """
  284. def __init__(self, grads_mode: str = "sobel") -> None:
  285. super().__init__()
  286. self.grads_mode: str = grads_mode
  287. def __repr__(self) -> str:
  288. return f"{self.__class__.__name__}(grads_mode={self.grads_mode})"
  289. def forward(self, input: Tensor, sigmas: Optional[Tensor] = None) -> Tensor:
  290. return gftt_response(input, self.grads_mode, sigmas)
  291. class BlobHessian(Module):
  292. r"""Module that calculates Hessian blobs.
  293. .. image:: _static/img/hessian_response.png
  294. See :func:`~kornia.feature.hessian_response` for details.
  295. """
  296. def __init__(self, grads_mode: str = "sobel") -> None:
  297. super().__init__()
  298. self.grads_mode: str = grads_mode
  299. def __repr__(self) -> str:
  300. return f"{self.__class__.__name__}(grads_mode={self.grads_mode})"
  301. def forward(self, input: Tensor, sigmas: Optional[Tensor] = None) -> Tensor:
  302. return hessian_response(input, self.grads_mode, sigmas)