base.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from __future__ import annotations
  18. from typing import Optional, Union
  19. from kornia.color.gray import grayscale_to_rgb
  20. from kornia.core import Tensor, tensor
  21. from kornia.core.external import PILImage as Image
  22. from kornia.models._hf_models import HFONNXComunnityModel
  23. __all__ = ["DepthEstimation"]
  24. class DepthEstimation(HFONNXComunnityModel):
  25. name: str = "depth_estimation"
  26. def __call__(self, images: Union[Tensor, list[Tensor]]) -> Union[Tensor, list[Tensor]]: # type: ignore[override]
  27. """Detect objects in a given list of images.
  28. Args:
  29. images: If list of RGB images. Each image is a Tensor with shape :math:`(3, H, W)`.
  30. If Tensor, a Tensor with shape :math:`(B, 3, H, W)`.
  31. Returns:
  32. list of detections found in each image. For item in a batch, shape is :math:`(D, 6)`, where :math:`D` is the
  33. number of detections in the given image, :math:`6` represents class id, score, and `xywh` bounding box.
  34. """
  35. if isinstance(
  36. images,
  37. (
  38. list,
  39. tuple,
  40. ),
  41. ):
  42. results = [super(DepthEstimation, self).__call__(image[None].cpu().numpy())[0] for image in images]
  43. results = [
  44. self.resize_back(tensor(result, device=image.device, dtype=image.dtype), image)
  45. for result, image in zip(results, images)
  46. ]
  47. return results
  48. result = super().__call__(images.cpu().numpy())[0]
  49. result = tensor(result, device=images.device, dtype=images.dtype)
  50. return self.resize_back(result, images)
  51. def visualize(
  52. self,
  53. images: Tensor,
  54. depth_maps: Optional[Union[Tensor, list[Tensor]]] = None,
  55. output_type: str = "torch",
  56. depth_type: str = "relative",
  57. max_depth: int = 80,
  58. ) -> Union[Tensor, list[Tensor], list[Image.Image]]: # type: ignore
  59. """Draw the segmentation results.
  60. Args:
  61. images: input tensor.
  62. depth_maps: estimated depths.
  63. output_type: type of the output.
  64. depth_type: 'metric' or 'relative' depth.
  65. max_depth: maximum depth value. Only valid for metric depth.
  66. Returns:
  67. output tensor.
  68. """
  69. if depth_maps is None:
  70. depth_maps = self(images)
  71. output = []
  72. for depth_map in depth_maps:
  73. if depth_type == "metric":
  74. depth_map = depth_map / max_depth
  75. elif depth_type == "relative":
  76. depth_map = depth_map / depth_map.max()
  77. else:
  78. raise ValueError(f"Unsupported depth type `{depth_type}`.")
  79. output.append(grayscale_to_rgb(depth_map))
  80. return self._tensor_to_type(output, output_type, is_batch=isinstance(images, Tensor))
  81. def save(
  82. self,
  83. images: Tensor,
  84. depth_maps: Optional[Union[Tensor, list[Tensor]]] = None,
  85. directory: Optional[str] = None,
  86. output_type: str = "torch",
  87. depth_type: str = "relative",
  88. max_depth: int = 80,
  89. ) -> None:
  90. """Save the segmentation results.
  91. Args:
  92. images: input tensor.
  93. depth_maps: estimated depths.
  94. output_type: type of the output.
  95. depth_type: 'metric' or 'relative' depth.
  96. max_depth: maximum depth value. Only valid for metric depth.
  97. directory: where to store outputs.
  98. Returns:
  99. output tensor.
  100. """
  101. outputs = self.visualize(images, depth_maps, output_type, depth_type=depth_type, max_depth=max_depth)
  102. self._save_outputs(images, directory, suffix="_src")
  103. self._save_outputs(outputs, directory, suffix="_depth")