distance_transform.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. import math
  18. import torch
  19. from torch import nn
  20. from kornia.filters import filter2d
  21. from kornia.utils import create_meshgrid
  22. def distance_transform(image: torch.Tensor, kernel_size: int = 3, h: float = 0.35) -> torch.Tensor:
  23. r"""Approximates the Manhattan distance transform of images using cascaded convolution operations.
  24. The value at each pixel in the output represents the distance to the nearest non-zero pixel in the image image.
  25. It uses the method described in :cite:`pham2021dtlayer`.
  26. The transformation is applied independently across the channel dimension of the images.
  27. Args:
  28. image: Image with shape :math:`(B,C,H,W)`.
  29. kernel_size: size of the convolution kernel.
  30. h: value that influence the approximation of the min function.
  31. Returns:
  32. tensor with shape :math:`(B,C,H,W)`.
  33. Example:
  34. >>> tensor = torch.zeros(1, 1, 5, 5)
  35. >>> tensor[:,:, 1, 2] = 1
  36. >>> dt = kornia.contrib.distance_transform(tensor)
  37. """
  38. if not isinstance(image, torch.Tensor):
  39. raise TypeError(f"image type is not a torch.Tensor. Got {type(image)}")
  40. if not len(image.shape) == 4:
  41. raise ValueError(f"Invalid image shape, we expect BxCxHxW. Got: {image.shape}")
  42. if kernel_size % 2 == 0:
  43. raise ValueError("Kernel size must be an odd number.")
  44. # n_iters is set such that the DT will be able to propagate from any corner of the image to its far,
  45. # diagonally opposite corner
  46. n_iters: int = math.ceil(max(image.shape[2], image.shape[3]) / math.floor(kernel_size / 2))
  47. grid = create_meshgrid(
  48. kernel_size, kernel_size, normalized_coordinates=False, device=image.device, dtype=image.dtype
  49. )
  50. grid -= math.floor(kernel_size / 2)
  51. kernel = torch.hypot(grid[0, :, :, 0], grid[0, :, :, 1])
  52. kernel = torch.exp(kernel / -h).unsqueeze(0)
  53. out = torch.zeros_like(image)
  54. # It is possible to avoid cloning the image if boundary = image, but this would require modifying the image tensor.
  55. boundary = image.clone()
  56. signal_ones = torch.ones_like(boundary)
  57. for i in range(n_iters):
  58. cdt = filter2d(boundary, kernel, border_type="replicate")
  59. cdt = -h * torch.log(cdt)
  60. # We are calculating log(0) above.
  61. cdt = torch.nan_to_num(cdt, posinf=0.0)
  62. mask = torch.where(cdt > 0, 1.0, 0.0)
  63. if mask.sum() == 0:
  64. break
  65. offset: int = i * (kernel_size // 2)
  66. out += (offset + cdt) * mask
  67. boundary = torch.where(mask == 1, signal_ones, boundary)
  68. return out
  69. class DistanceTransform(nn.Module):
  70. r"""Module that approximates the Manhattan (city block) distance transform of images using convolutions.
  71. Args:
  72. kernel_size: size of the convolution kernel.
  73. h: value that influence the approximation of the min function.
  74. """
  75. def __init__(self, kernel_size: int = 3, h: float = 0.35) -> None:
  76. super().__init__()
  77. self.kernel_size = kernel_size
  78. self.h = h
  79. def forward(self, image: torch.Tensor) -> torch.Tensor:
  80. # If images have multiple channels, view the channels in the batch dimension to match kernel shape.
  81. if image.shape[1] > 1:
  82. image_in = image.view(-1, 1, image.shape[-2], image.shape[-1])
  83. else:
  84. image_in = image
  85. return distance_transform(image_in, self.kernel_size, self.h).view_as(image)