yichael
/
xhs-note-crawling


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
							# LICENSE HEADER MANAGED BY add-license-header
#
# Copyright 2018 Kornia Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import math
from typing import Tuple

import torch
from torch import nn

from kornia.core import Module, Tensor, concatenate, eye, normalize
from kornia.core.check import KORNIA_CHECK_SHAPE
from kornia.filters import get_gaussian_kernel2d, spatial_gradient
from kornia.geometry.conversions import pi


def _get_reshape_kernel(kd: int, ky: int, kx: int) -> Tensor:
    """Return neigh2channels conv kernel."""
    numel: int = kd * ky * kx

    # Fast-path: use static _eye_cache if available for small numel
    # (to avoid repeated allocations for common kernel sizes)
    # The cache size is limited for memory efficiency.
    # NOTE: If memory is a concern and large kd/ky/kx are rare, adjust _MAX_CACHED.
    _MAX_CACHED = 4096
    if numel <= _MAX_CACHED:
        if not hasattr(_get_reshape_kernel, "_eye_cache"):
            _get_reshape_kernel._eye_cache = {}  # type: ignore[attr-defined]
        cache = _get_reshape_kernel._eye_cache  # type: ignore[attr-defined]
        res = cache.get(numel)
        if res is None:
            res = eye(numel)
            cache[numel] = res
        return res.view(numel, kd, ky, kx)
    else:
        # fallback to normal allocation for big kernels
        return eye(numel).view(numel, kd, ky, kx)


def get_sift_pooling_kernel(ksize: int = 25) -> Tensor:
    r"""Return a weighted pooling kernel for SIFT descriptor.

    Args:
        ksize: kernel_size.

    Returns:
        the pooling kernel with shape :math:`(ksize, ksize)`.

    """
    ks_2: float = float(ksize) / 2.0
    xc2 = ks_2 - (torch.arange(ksize).float() + 0.5 - ks_2).abs()
    kernel = torch.ger(xc2, xc2) / (ks_2**2)
    return kernel


def get_sift_bin_ksize_stride_pad(patch_size: int, num_spatial_bins: int) -> Tuple[int, int, int]:
    r"""Return a tuple with SIFT parameters.

    Args:
        patch_size: the given patch size.
        num_spatial_bins: the ggiven number of spatial bins.

    Returns:
        ksize, stride, pad.

    """
    ksize: int = 2 * int(patch_size / (num_spatial_bins + 1))
    stride: int = patch_size // num_spatial_bins
    pad: int = ksize // 4
    out_size: int = (patch_size + 2 * pad - (ksize - 1) - 1) // stride + 1
    if out_size != num_spatial_bins:
        raise ValueError(
            f"Patch size {patch_size} is incompatible with             requested number of spatial bins"
            f" {num_spatial_bins}             for SIFT descriptor. Usually it happens when patch size is too small     "
            "       for num_spatial_bins specified"
        )
    return ksize, stride, pad


class SIFTDescriptor(Module):
    r"""Module which computes SIFT descriptors of given patches.

    Args:
        patch_size: Input patch size in pixels.
        num_ang_bins: Number of angular bins.
        num_spatial_bins: Number of spatial bins.
        clipval: clipping value to reduce single-bin dominance
        rootsift: if ``True``, RootSIFT (Arandjelović et. al, 2012) is computed.

    Returns:
        SIFT descriptor of the patches with shape.

    Shape:
        - Input: :math:`(B, 1, \text{num_spatial_bins}, \text{num_spatial_bins})`
        - Output: :math:`(B, \text{num_ang_bins * num_spatial_bins ** 2})`

    Example:
        >>> input = torch.rand(23, 1, 32, 32)
        >>> SIFT = SIFTDescriptor(32, 8, 4)
        >>> descs = SIFT(input) # 23x128

    """

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}("
            f"num_ang_bins={self.num_ang_bins}, "
            f"num_spatial_bins={self.num_spatial_bins}, "
            f"patch_size={self.patch_size}, "
            f"rootsift={self.rootsift}, "
            f"clipval={self.clipval})"
        )

    def __init__(
        self,
        patch_size: int = 41,
        num_ang_bins: int = 8,
        num_spatial_bins: int = 4,
        rootsift: bool = True,
        clipval: float = 0.2,
    ) -> None:
        super().__init__()
        self.eps = 1e-10
        self.num_ang_bins = num_ang_bins
        self.num_spatial_bins = num_spatial_bins
        self.clipval = clipval
        self.rootsift = rootsift
        self.patch_size = patch_size

        ks: int = self.patch_size
        sigma: float = float(ks) / math.sqrt(2.0)
        self.gk = get_gaussian_kernel2d((ks, ks), (sigma, sigma), True)

        (self.bin_ksize, self.bin_stride, self.pad) = get_sift_bin_ksize_stride_pad(patch_size, num_spatial_bins)

        nw = get_sift_pooling_kernel(ksize=self.bin_ksize).float()
        self.pk = nn.Conv2d(
            1,
            1,
            kernel_size=(nw.size(0), nw.size(1)),
            stride=(self.bin_stride, self.bin_stride),
            padding=(self.pad, self.pad),
            bias=False,
        )
        self.pk.weight.data.copy_(nw.reshape(1, 1, nw.size(0), nw.size(1)))

    def get_pooling_kernel(self) -> Tensor:
        return self.pk.weight.detach()

    def get_weighting_kernel(self) -> Tensor:
        return self.gk.detach()

    def forward(self, input: Tensor) -> Tensor:
        KORNIA_CHECK_SHAPE(input, ["B", "1", f"{self.patch_size}", f"{self.patch_size}"])
        B: int = input.shape[0]
        self.pk = self.pk.to(input.dtype).to(input.device)

        grads = spatial_gradient(input, "diff")
        # unpack the edges
        gx = grads[:, :, 0]
        gy = grads[:, :, 1]

        mag = torch.sqrt(gx * gx + gy * gy + self.eps)
        ori = torch.atan2(gy, gx + self.eps) + 2.0 * pi
        mag = mag * self.gk.expand_as(mag).type_as(mag).to(mag.device)
        o_big = float(self.num_ang_bins) * ori / (2.0 * pi)

        bo0_big_ = torch.floor(o_big)
        wo1_big_ = o_big - bo0_big_
        bo0_big = bo0_big_ % self.num_ang_bins
        bo1_big = (bo0_big + 1) % self.num_ang_bins
        wo0_big = (1.0 - wo1_big_) * mag
        wo1_big = wo1_big_ * mag

        ang_bins = concatenate(
            [
                self.pk((bo0_big == i).to(input.dtype) * wo0_big + (bo1_big == i).to(input.dtype) * wo1_big)
                for i in range(0, self.num_ang_bins)
            ],
            1,
        )
        ang_bins = ang_bins.view(B, -1)
        ang_bins = normalize(ang_bins, p=2)
        ang_bins = torch.clamp(ang_bins, 0.0, float(self.clipval))
        ang_bins = normalize(ang_bins, p=2)
        if self.rootsift:
            ang_bins = torch.sqrt(normalize(ang_bins, p=1) + self.eps)
        return ang_bins


def sift_describe(
    input: Tensor,
    patch_size: int = 41,
    num_ang_bins: int = 8,
    num_spatial_bins: int = 4,
    rootsift: bool = True,
    clipval: float = 0.2,
) -> Tensor:
    r"""Compute the sift descriptor.

    See
    :class: `~kornia.feature.SIFTDescriptor` for details.
    """
    return SIFTDescriptor(patch_size, num_ang_bins, num_spatial_bins, rootsift, clipval)(input)


class DenseSIFTDescriptor(Module):
    """Module, which computes SIFT descriptor densely over the image.

    Args:
        num_ang_bins: Number of angular bins. (8 is default)
        num_spatial_bins: Number of spatial bins per descriptor (4 is default).
    You might want to set odd number and relevant padding to keep feature map size
        spatial_bin_size: Size of a spatial bin in pixels (4 is default)
        clipval: clipping value to reduce single-bin dominance
        rootsift: (bool) if True, RootSIFT (Arandjelović et. al, 2012) is computed
        stride: default 1
        padding: default 0

    Returns:
        Tensor: DenseSIFT descriptor of the image

    Shape:
        - Input: (B, 1, H, W)
        - Output: (B, num_ang_bins * num_spatial_bins ** 2, (H+padding)/stride, (W+padding)/stride)

    Examples::
        >>> input =  torch.rand(2, 1, 200, 300)
        >>> SIFT = DenseSIFTDescriptor()
        >>> descs = SIFT(input) # 2x128x194x294

    """

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}("
            f"num_ang_bins={self.num_ang_bins}, "
            f"num_spatial_bins={self.num_spatial_bins}, "
            f"spatial_bin_size={self.spatial_bin_size}, "
            f"rootsift={self.rootsift}, "
            f"stride={self.stride}, "
            f"clipval={self.clipval})"
        )

    def __init__(
        self,
        num_ang_bins: int = 8,
        num_spatial_bins: int = 4,
        spatial_bin_size: int = 4,
        rootsift: bool = True,
        clipval: float = 0.2,
        stride: int = 1,
        padding: int = 1,
    ) -> None:
        super().__init__()
        self.eps = 1e-10
        self.num_ang_bins = num_ang_bins
        self.num_spatial_bins = num_spatial_bins
        self.spatial_bin_size = spatial_bin_size
        self.clipval = clipval
        self.rootsift = rootsift
        self.stride = stride
        self.pad = padding

        # Only allocate pooling kernels once during construction
        nw = get_sift_pooling_kernel(ksize=self.spatial_bin_size).float()
        self.register_buffer("_bin_pooling_kernel_weight", nw.reshape(1, 1, nw.size(0), nw.size(1)))
        bin_pooling_kernel = nn.Conv2d(
            1,
            1,
            kernel_size=(nw.size(0), nw.size(1)),
            stride=(1, 1),
            bias=False,
            padding=(nw.size(0) // 2, nw.size(1) // 2),
        )
        bin_pooling_kernel.weight.data.copy_(self._bin_pooling_kernel_weight)
        self.bin_pooling_kernel = bin_pooling_kernel

        Pw = _get_reshape_kernel(num_ang_bins, num_spatial_bins, num_spatial_bins).float()
        self.register_buffer("_poolingconv_weight", Pw)
        PoolingConv = nn.Conv2d(
            num_ang_bins,
            num_ang_bins * num_spatial_bins**2,
            kernel_size=(num_spatial_bins, num_spatial_bins),
            stride=(self.stride, self.stride),
            bias=False,
            padding=(self.pad, self.pad),
        )
        PoolingConv.weight.data.copy_(self._poolingconv_weight)
        self.PoolingConv = PoolingConv

        # Cache pooling kernel tensor for fast return in get_pooling_kernel
        self._pooling_kernel = self._bin_pooling_kernel_weight.detach()

    def get_pooling_kernel(self) -> Tensor:
        # Return the cached detached pooling kernel directly for optimal speed
        return self._pooling_kernel

    def forward(self, input: Tensor) -> Tensor:
        KORNIA_CHECK_SHAPE(input, ["B", "1", "H", "W"])

        _B, _CH, _W, _H = input.size()
        self.bin_pooling_kernel = self.bin_pooling_kernel.to(input.dtype).to(input.device)
        self.PoolingConv = self.PoolingConv.to(input.dtype).to(input.device)
        grads = spatial_gradient(input, "diff")
        # unpack the edges
        gx = grads[:, :, 0]
        gy = grads[:, :, 1]
        mag = torch.sqrt(gx * gx + gy * gy + self.eps)
        ori = torch.atan2(gy, gx + self.eps) + 2.0 * pi
        o_big = float(self.num_ang_bins) * ori / (2.0 * pi)

        bo0_big_ = torch.floor(o_big)
        wo1_big_ = o_big - bo0_big_
        bo0_big = bo0_big_ % self.num_ang_bins
        bo1_big = (bo0_big + 1) % self.num_ang_bins
        wo0_big = (1.0 - wo1_big_) * mag
        wo1_big = wo1_big_ * mag
        ang_bins = concatenate(
            [
                self.bin_pooling_kernel(
                    (bo0_big == i).to(input.dtype) * wo0_big + (bo1_big == i).to(input.dtype) * wo1_big
                )
                for i in range(0, self.num_ang_bins)
            ],
            1,
        )

        out_no_norm = self.PoolingConv(ang_bins)
        out = normalize(out_no_norm, dim=1, p=2).clamp_(0, float(self.clipval))
        out = normalize(out, dim=1, p=2)
        if self.rootsift:
            out = torch.sqrt(normalize(out, p=1) + self.eps)
        return out