rtdetr.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. import warnings
  18. from typing import Optional
  19. import torch
  20. from kornia.contrib.models.rt_detr import DETRPostProcessor
  21. from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
  22. from kornia.models.detection.base import ObjectDetector
  23. from kornia.models.utils import ResizePreProcessor
  24. __all__ = ["RTDETRDetectorBuilder"]
  25. class RTDETRDetectorBuilder:
  26. """A builder class for constructing RT-DETR object detection models.
  27. This class provides static methods to:
  28. - Build an object detection model from a model name or configuration.
  29. - Export the model to ONNX format for inference.
  30. .. code-block:: python
  31. images = kornia.utils.sample.get_sample_images()
  32. model = RTDETRDetectorBuilder.build()
  33. model.save(images)
  34. """
  35. @staticmethod
  36. def build(
  37. model_name: Optional[str] = None,
  38. config: Optional[RTDETRConfig] = None,
  39. pretrained: bool = True,
  40. image_size: Optional[int] = None,
  41. confidence_threshold: Optional[float] = None,
  42. confidence_filtering: Optional[bool] = None,
  43. ) -> ObjectDetector:
  44. """Build and returns an RT-DETR object detector model.
  45. Either `model_name` or `config` must be provided. If neither is provided,
  46. a default pretrained model (`rtdetr_r18vd`) will be built.
  47. Args:
  48. model_name:
  49. Name of the RT-DETR model to load. Can be one of the available pretrained models.
  50. Including 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
  51. config:
  52. A custom configuration object for building the RT-DETR model.
  53. pretrained:
  54. Whether to load a pretrained version of the model (applies when `model_name` is provided).
  55. image_size:
  56. The size to which input images will be resized during preprocessing.
  57. If None, no resizing will be inferred from config file. Recommended scales include
  58. [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800].
  59. confidence_threshold: Threshold to filter results based on confidence scores.
  60. confidence_filtering: Whether to filter results based on confidence scores.
  61. Returns:
  62. ObjectDetector
  63. An object detector instance initialized with the specified model, preprocessor, and post-processor.
  64. """
  65. if model_name is not None and config is not None:
  66. raise ValueError("Either `model_name` or `config` should be `None`.")
  67. if config is not None:
  68. model = RTDETR.from_config(config)
  69. image_size = image_size or config.input_size
  70. elif model_name is not None:
  71. if pretrained:
  72. model = RTDETR.from_pretrained(model_name)
  73. image_size = RTDETRConfig.from_name(model_name).input_size
  74. else:
  75. model = RTDETR.from_name(model_name)
  76. image_size = RTDETRConfig.from_name(model_name).input_size
  77. else:
  78. warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.", stacklevel=1)
  79. model = RTDETR.from_pretrained("rtdetr_r18vd")
  80. image_size = RTDETRConfig.from_name("rtdetr_r18vd").input_size
  81. if confidence_threshold is None:
  82. confidence_threshold = config.confidence_threshold if config is not None else 0.3
  83. return ObjectDetector(
  84. model,
  85. ResizePreProcessor(image_size, image_size),
  86. DETRPostProcessor(
  87. confidence_threshold=confidence_threshold,
  88. confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export(),
  89. num_classes=model.decoder.num_classes,
  90. num_top_queries=model.decoder.num_queries,
  91. ),
  92. )