structures.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from __future__ import annotations
  18. from dataclasses import dataclass
  19. from typing import Optional
  20. from kornia.core import Tensor
  21. from kornia.core.check import KORNIA_CHECK
  22. from kornia.geometry.transform import resize
  23. @dataclass
  24. class SegmentationResults:
  25. """Encapsulate the results obtained by a Segmentation model.
  26. Args:
  27. logits: Results logits with shape :math:`(B, C, H, W)`, where :math:`C` refers to the number of predicted masks
  28. scores: The scores from the logits. Shape :math:`(B, C)`
  29. mask_threshold: The threshold value to generate the `binary_masks` from the `logits`
  30. """
  31. logits: Tensor
  32. scores: Tensor
  33. mask_threshold: float = 0.0
  34. @property
  35. def binary_masks(self) -> Tensor:
  36. """Binary mask generated from logits considering the mask_threshold.
  37. Shape will be the same of logits :math:`(B, C, H, W)` where :math:`C` is the number masks predicted.
  38. .. note:: If you run `original_res_logits`, this will generate the masks
  39. based on the original resolution logits.
  40. Otherwise, this will use the low resolution logits (self.logits).
  41. """
  42. if self._original_res_logits is not None:
  43. x = self._original_res_logits
  44. else:
  45. x = self.logits
  46. return x > self.mask_threshold
  47. def original_res_logits(
  48. self, input_size: tuple[int, int], original_size: tuple[int, int], image_size_encoder: Optional[tuple[int, int]]
  49. ) -> Tensor:
  50. """Remove padding and upscale the logits to the original image size.
  51. Resize to image encoder input -> remove padding (bottom and right) -> Resize to original size
  52. .. note:: This method set a internal `original_res_logits` which will be used if available for the binary masks.
  53. Args:
  54. input_size: The size of the image input to the model, in (H, W) format. Used to remove padding.
  55. original_size: The original size of the image before resizing for input to the model, in (H, W) format.
  56. image_size_encoder: The size of the input image for image encoder, in (H, W) format. Used to resize the
  57. logits back to encoder resolution before remove the padding.
  58. Returns:
  59. Batched logits in :math:`(K, C, H, W)` format, where (H, W) is given by original_size.
  60. """
  61. x = self.logits
  62. if isinstance(image_size_encoder, tuple):
  63. x = resize(x, size=image_size_encoder, interpolation="bilinear", align_corners=False, antialias=False)
  64. x = x[..., : input_size[0], : input_size[1]]
  65. x = resize(x, size=original_size, interpolation="bilinear", align_corners=False, antialias=False)
  66. self._original_res_logits = x
  67. return self._original_res_logits
  68. def squeeze(self, dim: int = 0) -> SegmentationResults:
  69. """Realize a squeeze for the dim given for all properties."""
  70. self.logits = self.logits.squeeze(dim)
  71. self.scores = self.scores.squeeze(dim)
  72. if isinstance(self._original_res_logits, Tensor):
  73. self._original_res_logits = self._original_res_logits.squeeze(dim)
  74. return self
  75. @dataclass
  76. class Prompts:
  77. """Encapsulate the prompts inputs for a Model.
  78. Args:
  79. points: A tuple with the keypoints (coordinates x, y) and their respective labels. Shape :math:`(K, N, 2)` for
  80. the keypoints, and :math:`(K, N)`
  81. boxes: Batched box inputs, with shape :math:`(K, 4)`. Expected to be into xyxy format.
  82. masks: Batched mask prompts to the model with shape :math:`(K, 1, H, W)`
  83. """
  84. points: Optional[tuple[Tensor, Tensor]] = None
  85. boxes: Optional[Tensor] = None
  86. masks: Optional[Tensor] = None
  87. def __post_init__(self) -> None:
  88. if isinstance(self.keypoints, Tensor) and isinstance(self.boxes, Tensor):
  89. KORNIA_CHECK(self.keypoints.shape[0] == self.boxes.shape[0], "The prompts should have the same batch size!")
  90. @property
  91. def keypoints(self) -> Optional[Tensor]:
  92. """The keypoints from the `points`."""
  93. return self.points[0] if isinstance(self.points, tuple) else None
  94. @property
  95. def keypoints_labels(self) -> Optional[Tensor]:
  96. """The keypoints labels from the `points`."""
  97. return self.points[1] if isinstance(self.points, tuple) else None