positional_encoder.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. from functools import partial
  18. import torch
  19. from torch import nn
  20. from kornia.core import Tensor
  21. def _torch_sin(x: Tensor, freq: Tensor) -> Tensor:
  22. return (x * freq).sin() # FIXME: PI?
  23. def _torch_cos(x: Tensor, freq: Tensor) -> Tensor:
  24. return (x * freq).cos()
  25. class PositionalEncoder(nn.Module):
  26. """Sine-cosine positional encoder for input points."""
  27. def __init__(self, num_dims: int, num_freqs: int, log_space: bool = False) -> None:
  28. """Initialize positional encoder.
  29. Args:
  30. num_dims: Number of input dimensions (channels): int
  31. num_freqs: Number of frequency bands for encoding span: int
  32. log_space: Whether frequency sampling should be log spaced: bool
  33. """
  34. super().__init__()
  35. self._num_dims = num_dims
  36. self._embed_fns = [lambda x: x]
  37. # Define frequencies in either linear or log scale
  38. freq_bands: Tensor
  39. if log_space:
  40. freq_bands = 2.0 ** torch.linspace(0.0, num_freqs - 1, num_freqs)
  41. else:
  42. freq_bands = torch.linspace(2.0**0.0, 2.0 ** (num_freqs - 1), num_freqs)
  43. # Alternate sin and cos
  44. for freq in freq_bands:
  45. self._embed_fns.append(partial(_torch_sin, freq=freq))
  46. self._embed_fns.append(partial(_torch_cos, freq=freq))
  47. self._num_encoded_dims = self._num_dims * len(self._embed_fns)
  48. @property
  49. def num_encoded_dims(self) -> int:
  50. """Number of encoded dimensions."""
  51. return self._num_encoded_dims
  52. def forward(self, x: Tensor) -> Tensor:
  53. """Apply positional encoding to input.
  54. Args:
  55. x: Positionsl (or directional) tensor to encode: Tensor
  56. Returns:
  57. Tensor with encoded position/direction: Tensor
  58. """
  59. if x.ndim < 1:
  60. raise ValueError("Input tensor represents a scalar")
  61. if x.shape[-1] != self._num_dims:
  62. raise ValueError(
  63. f"Input tensor number of dimensions {x.shape[-1]} does not match instantiated dimensionality "
  64. f"{self._num_dims}"
  65. )
  66. return torch.cat([fn(x) for fn in self._embed_fns], dim=-1)