hardnet.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from typing import Dict
  18. import torch
  19. import torch.nn.functional as F
  20. from torch import nn
  21. from kornia.core.check import KORNIA_CHECK_SHAPE
  22. from kornia.utils.helpers import is_mps_tensor_safe
  23. urls: Dict[str, str] = {}
  24. urls["hardnet++"] = "https://github.com/DagnyT/hardnet/raw/master/pretrained/pretrained_all_datasets/HardNet++.pth"
  25. urls["liberty_aug"] = (
  26. "https://github.com/DagnyT/hardnet/raw/master/pretrained/train_liberty_with_aug/checkpoint_liberty_with_aug.pth"
  27. )
  28. urls["hardnet8v2"] = "http://cmp.felk.cvut.cz/~mishkdmy/hardnet8v2.pt"
  29. class HardNet(nn.Module):
  30. r"""Module, which computes HardNet descriptors of given grayscale patches of 32x32.
  31. This is based on the original code from paper "Working hard to know your neighbor's
  32. margins: Local descriptor learning loss". See :cite:`HardNet2017` for more details.
  33. Args:
  34. pretrained: Download and set pretrained weights to the model.
  35. Returns:
  36. torch.Tensor: HardNet descriptor of the patches.
  37. Shape:
  38. - Input: :math:`(B, 1, 32, 32)`
  39. - Output: :math:`(B, 128)`
  40. Examples:
  41. >>> input = torch.rand(16, 1, 32, 32)
  42. >>> hardnet = HardNet()
  43. >>> descs = hardnet(input) # 16x128
  44. """
  45. patch_size = 32
  46. def __init__(self, pretrained: bool = False) -> None:
  47. super().__init__()
  48. self.features = nn.Sequential(
  49. nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=False),
  50. nn.BatchNorm2d(32, affine=False),
  51. nn.ReLU(),
  52. nn.Conv2d(32, 32, kernel_size=3, padding=1, bias=False),
  53. nn.BatchNorm2d(32, affine=False),
  54. nn.ReLU(),
  55. nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False),
  56. nn.BatchNorm2d(64, affine=False),
  57. nn.ReLU(),
  58. nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
  59. nn.BatchNorm2d(64, affine=False),
  60. nn.ReLU(),
  61. nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False),
  62. nn.BatchNorm2d(128, affine=False),
  63. nn.ReLU(),
  64. nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
  65. nn.BatchNorm2d(128, affine=False),
  66. nn.ReLU(),
  67. nn.Dropout(0.3),
  68. nn.Conv2d(128, 128, kernel_size=8, bias=False),
  69. nn.BatchNorm2d(128, affine=False),
  70. )
  71. # use torch.hub to load pretrained model
  72. if pretrained:
  73. pretrained_dict = torch.hub.load_state_dict_from_url(urls["liberty_aug"], map_location=torch.device("cpu"))
  74. self.load_state_dict(pretrained_dict["state_dict"], strict=True)
  75. self.eval()
  76. @staticmethod
  77. def _normalize_input(x: torch.Tensor, eps: float = 1e-6) -> torch.Tensor:
  78. """Normalize the input by batch."""
  79. if not is_mps_tensor_safe(x):
  80. sp, mp = torch.std_mean(x, dim=(-3, -2, -1), keepdim=True)
  81. else:
  82. mp = torch.mean(x, dim=(-3, -2, -1), keepdim=True)
  83. sp = torch.std(x, dim=(-3, -2, -1), keepdim=True)
  84. # WARNING: we need to .detach() input, otherwise the gradients produced by
  85. # the patches extractor with F.grid_sample are very noisy, making the detector
  86. # training totally unstable.
  87. return (x - mp.detach()) / (sp.detach() + eps)
  88. def forward(self, input: torch.Tensor) -> torch.Tensor:
  89. KORNIA_CHECK_SHAPE(input, ["B", "1", "32", "32"])
  90. x_norm: torch.Tensor = self._normalize_input(input)
  91. x_features: torch.Tensor = self.features(x_norm)
  92. x_out = x_features.view(x_features.size(0), -1)
  93. return F.normalize(x_out, dim=1)
  94. class HardNet8(nn.Module):
  95. r"""Module, which computes HardNet8 descriptors of given grayscale patches of 32x32.
  96. This is based on the original code from paper "Improving the HardNet Descriptor".
  97. See :cite:`HardNet2020` for more details.
  98. Args:
  99. pretrained: Download and set pretrained weights to the model.
  100. Returns:
  101. torch.Tensor: HardNet8 descriptor of the patches.
  102. Shape:
  103. - Input: :math:`(B, 1, 32, 32)`
  104. - Output: :math:`(B, 128)`
  105. Examples:
  106. >>> input = torch.rand(16, 1, 32, 32)
  107. >>> hardnet = HardNet8()
  108. >>> descs = hardnet(input) # 16x128
  109. """
  110. patch_size = 32
  111. def __init__(self, pretrained: bool = False) -> None:
  112. super().__init__()
  113. self.features = nn.Sequential(
  114. nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=False),
  115. nn.BatchNorm2d(32, affine=False),
  116. nn.ReLU(),
  117. nn.Conv2d(32, 32, kernel_size=3, padding=1, bias=False),
  118. nn.BatchNorm2d(32, affine=False),
  119. nn.ReLU(),
  120. nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False),
  121. nn.BatchNorm2d(64, affine=False),
  122. nn.ReLU(),
  123. nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
  124. nn.BatchNorm2d(64, affine=False),
  125. nn.ReLU(),
  126. nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False),
  127. nn.BatchNorm2d(128, affine=False),
  128. nn.ReLU(),
  129. nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
  130. nn.BatchNorm2d(128, affine=False),
  131. nn.ReLU(),
  132. nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False),
  133. nn.BatchNorm2d(256, affine=False),
  134. nn.ReLU(),
  135. nn.Dropout(0.3),
  136. nn.Conv2d(256, 512, kernel_size=8, bias=False),
  137. nn.BatchNorm2d(512, affine=False),
  138. )
  139. self.features.apply(self.weights_init)
  140. self.register_buffer("components", torch.ones(512, 128, dtype=torch.float))
  141. self.register_buffer("mean", torch.zeros(512, dtype=torch.float))
  142. # use torch.hub to load pretrained model
  143. if pretrained:
  144. pretrained_dict = torch.hub.load_state_dict_from_url(urls["hardnet8v2"], map_location=torch.device("cpu"))
  145. self.load_state_dict(pretrained_dict, strict=True)
  146. self.eval()
  147. @staticmethod
  148. def weights_init(m: object) -> None:
  149. if isinstance(m, nn.Conv2d):
  150. nn.init.orthogonal_(m.weight.data, gain=0.6)
  151. if m.bias is not None:
  152. nn.init.constant_(m.bias.data, 0.01)
  153. @staticmethod
  154. def _normalize_input(x: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
  155. """Normalize the input by batch."""
  156. if not is_mps_tensor_safe(x):
  157. sp, mp = torch.std_mean(x, dim=(-3, -2, -1), keepdim=True)
  158. else:
  159. mp = torch.mean(x, dim=(-3, -2, -1), keepdim=True)
  160. sp = torch.std(x, dim=(-3, -2, -1), keepdim=True)
  161. # WARNING: we need to .detach() input, otherwise the gradients produced by
  162. # the patches extractor with F.grid_sample are very noisy, making the detector
  163. # training totally unstable.
  164. return (x - mp.detach()) / (sp.detach() + eps)
  165. def forward(self, input: torch.Tensor) -> torch.Tensor:
  166. KORNIA_CHECK_SHAPE(input, ["B", "1", "32", "32"])
  167. x_norm: torch.Tensor = self._normalize_input(input)
  168. x_features: torch.Tensor = self.features(x_norm)
  169. mean: torch.Tensor = torch.jit.annotate(torch.Tensor, self.mean)
  170. components: torch.Tensor = torch.jit.annotate(torch.Tensor, self.components)
  171. x_prePCA = F.normalize(x_features.view(x_features.size(0), -1))
  172. pca = torch.mm(x_prePCA - mean, components)
  173. return F.normalize(pca, dim=1)