| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- import warnings
- import cv2
- import numpy as np
- import torch
- from kornia.color import rgb_to_grayscale
- from packaging import version
- try:
- import pycolmap
- except ImportError:
- pycolmap = None
- from .utils import Extractor
- def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
- h, w = image_shape
- ij = np.round(points - 0.5).astype(int).T[::-1]
- # Remove duplicate points (identical coordinates).
- # Pick highest scale or score
- s = scales if scores is None else scores
- buffer = np.zeros((h, w))
- np.maximum.at(buffer, tuple(ij), s)
- keep = np.where(buffer[tuple(ij)] == s)[0]
- # Pick lowest angle (arbitrary).
- ij = ij[:, keep]
- buffer[:] = np.inf
- o_abs = np.abs(angles[keep])
- np.minimum.at(buffer, tuple(ij), o_abs)
- mask = buffer[tuple(ij)] == o_abs
- ij = ij[:, mask]
- keep = keep[mask]
- if nms_radius > 0:
- # Apply NMS on the remaining points
- buffer[:] = 0
- buffer[tuple(ij)] = s[keep] # scores or scale
- local_max = torch.nn.functional.max_pool2d(
- torch.from_numpy(buffer).unsqueeze(0),
- kernel_size=nms_radius * 2 + 1,
- stride=1,
- padding=nms_radius,
- ).squeeze(0)
- is_local_max = buffer == local_max.numpy()
- keep = keep[is_local_max[tuple(ij)]]
- return keep
- def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
- x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
- x.clip_(min=eps).sqrt_()
- return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
- def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
- """
- Detect keypoints using OpenCV Detector.
- Optionally, perform description.
- Args:
- features: OpenCV based keypoints detector and descriptor
- image: Grayscale image of uint8 data type
- Returns:
- keypoints: 1D array of detected cv2.KeyPoint
- scores: 1D array of responses
- descriptors: 1D array of descriptors
- """
- detections, descriptors = features.detectAndCompute(image, None)
- points = np.array([k.pt for k in detections], dtype=np.float32)
- scores = np.array([k.response for k in detections], dtype=np.float32)
- scales = np.array([k.size for k in detections], dtype=np.float32)
- angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
- return points, scores, scales, angles, descriptors
- class SIFT(Extractor):
- default_conf = {
- "rootsift": True,
- "nms_radius": 0, # None to disable filtering entirely.
- "max_num_keypoints": 4096,
- "backend": "opencv", # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
- "detection_threshold": 0.0066667, # from COLMAP
- "edge_threshold": 10,
- "first_octave": -1, # only used by pycolmap, the default of COLMAP
- "num_octaves": 4,
- }
- preprocess_conf = {
- "resize": 1024,
- }
- required_data_keys = ["image"]
- def __init__(self, **conf):
- super().__init__(**conf) # Update with default configuration.
- backend = self.conf.backend
- if backend.startswith("pycolmap"):
- if pycolmap is None:
- raise ImportError(
- "Cannot find module pycolmap: install it with pip"
- "or use backend=opencv."
- )
- options = {
- "peak_threshold": self.conf.detection_threshold,
- "edge_threshold": self.conf.edge_threshold,
- "first_octave": self.conf.first_octave,
- "num_octaves": self.conf.num_octaves,
- "normalization": pycolmap.Normalization.L2, # L1_ROOT is buggy.
- }
- device = (
- "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
- )
- if (
- backend == "pycolmap_cpu" or not pycolmap.has_cuda
- ) and pycolmap.__version__ < "0.5.0":
- warnings.warn(
- "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
- "consider upgrading pycolmap or use the CUDA version.",
- stacklevel=1,
- )
- else:
- options["max_num_features"] = self.conf.max_num_keypoints
- self.sift = pycolmap.Sift(options=options, device=device)
- elif backend == "opencv":
- self.sift = cv2.SIFT_create(
- contrastThreshold=self.conf.detection_threshold,
- nfeatures=self.conf.max_num_keypoints,
- edgeThreshold=self.conf.edge_threshold,
- nOctaveLayers=self.conf.num_octaves,
- )
- else:
- backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
- raise ValueError(
- f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
- )
- def extract_single_image(self, image: torch.Tensor):
- image_np = image.cpu().numpy().squeeze(0)
- if self.conf.backend.startswith("pycolmap"):
- if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
- detections, descriptors = self.sift.extract(image_np)
- scores = None # Scores are not exposed by COLMAP anymore.
- else:
- detections, scores, descriptors = self.sift.extract(image_np)
- keypoints = detections[:, :2] # Keep only (x, y).
- scales, angles = detections[:, -2:].T
- if scores is not None and (
- self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
- ):
- # Set the scores as a combination of abs. response and scale.
- scores = np.abs(scores) * scales
- elif self.conf.backend == "opencv":
- # TODO: Check if opencv keypoints are already in corner convention
- keypoints, scores, scales, angles, descriptors = run_opencv_sift(
- self.sift, (image_np * 255.0).astype(np.uint8)
- )
- pred = {
- "keypoints": keypoints,
- "scales": scales,
- "oris": angles,
- "descriptors": descriptors,
- }
- if scores is not None:
- pred["keypoint_scores"] = scores
- # sometimes pycolmap returns points outside the image. We remove them
- if self.conf.backend.startswith("pycolmap"):
- is_inside = (
- pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
- ).all(-1)
- pred = {k: v[is_inside] for k, v in pred.items()}
- if self.conf.nms_radius is not None:
- keep = filter_dog_point(
- pred["keypoints"],
- pred["scales"],
- pred["oris"],
- image_np.shape,
- self.conf.nms_radius,
- scores=pred.get("keypoint_scores"),
- )
- pred = {k: v[keep] for k, v in pred.items()}
- pred = {k: torch.from_numpy(v) for k, v in pred.items()}
- if scores is not None:
- # Keep the k keypoints with highest score
- num_points = self.conf.max_num_keypoints
- if num_points is not None and len(pred["keypoints"]) > num_points:
- indices = torch.topk(pred["keypoint_scores"], num_points).indices
- pred = {k: v[indices] for k, v in pred.items()}
- return pred
- def forward(self, data: dict) -> dict:
- image = data["image"]
- if image.shape[1] == 3:
- image = rgb_to_grayscale(image)
- device = image.device
- image = image.cpu()
- pred = []
- for k in range(len(image)):
- img = image[k]
- if "image_size" in data.keys():
- # avoid extracting points in padded areas
- w, h = data["image_size"][k]
- img = img[:, :h, :w]
- p = self.extract_single_image(img)
- pred.append(p)
- pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
- if self.conf.rootsift:
- pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
- return pred
|