camera_utils.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. # LICENSE HEADER MANAGED BY add-license-header
  2. #
  3. # Copyright 2018 Kornia Team
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. import math
  18. from typing import List, Tuple, Union
  19. import torch
  20. from kornia.core import Device, Tensor, cos, sin, stack
  21. from kornia.geometry.camera import PinholeCamera
  22. from kornia.geometry.conversions import quaternion_to_rotation_matrix
  23. def parse_colmap_output(
  24. cameras_path: str, images_path: str, device: Device, dtype: torch.dtype
  25. ) -> Tuple[List[str], PinholeCamera]:
  26. r"""Parse colmap output to create an PinholeCamera for aligned scene cameras.
  27. Args:
  28. cameras_path: Path to camera.txt Colmap file with camera intrinsics: str
  29. images_path: Path to images.txt Colmap file with camera extrinsics for each image: str
  30. device: device for created camera object: Union[str, torch.device]
  31. dtype: Intrinsics and extrinsics dtype.
  32. Returns:
  33. image names: List[str]
  34. scene camera object: PinholeCamera
  35. """
  36. # Parse camera intrinsics
  37. with open(cameras_path) as f:
  38. lines = [line.strip() for line in f if not line.startswith("#")]
  39. class CameraParams:
  40. def __init__(self, line: str) -> None:
  41. split_line = line.split(" ")
  42. if len(split_line) < 7:
  43. raise ValueError(f"Invalid camera line: {line}")
  44. model = split_line[1]
  45. if model == "SIMPLE_PINHOLE":
  46. self.width = int(split_line[2])
  47. self.height = int(split_line[3])
  48. self.fx = float(split_line[4])
  49. self.fy = self.fx
  50. self.cx = float(split_line[5])
  51. self.cy = float(split_line[6])
  52. elif model == "PINHOLE":
  53. if len(split_line) < 8:
  54. raise ValueError(f"Invalid PINHOLE camera line: {line}")
  55. self.width = int(split_line[2])
  56. self.height = int(split_line[3])
  57. self.fx = float(split_line[4])
  58. self.fy = float(split_line[5])
  59. self.cx = float(split_line[6])
  60. self.cy = float(split_line[7])
  61. else:
  62. raise ValueError(f"Unsupported camera model: {model}")
  63. cameras_params: List[CameraParams] = [CameraParams(line) for line in lines]
  64. with open(images_path) as f:
  65. lines = [
  66. stripped
  67. for stripped in (line.strip() for line in f if not line.startswith("#"))
  68. if stripped.endswith(("jpg", "png"))
  69. ]
  70. num_images = len(lines)
  71. if num_images == 0:
  72. raise ValueError("No valid images found in images.txt")
  73. img_names: List[str] = []
  74. camera_inds: List[int] = []
  75. quats_list: List[List[float]] = []
  76. ts_list: List[List[float]] = []
  77. for line in lines:
  78. split_line = line.split(" ")
  79. if len(split_line) < 10:
  80. raise ValueError(f"Invalid image line: {line}")
  81. qw, qx, qy, qz = map(float, split_line[1:5])
  82. tx, ty, tz = map(float, split_line[5:8])
  83. camera_ind = int(split_line[8]) - 1
  84. img_name = split_line[9]
  85. if camera_ind >= len(cameras_params):
  86. raise ValueError(f"Invalid camera index {camera_ind + 1} for image {img_name}")
  87. img_names.append(img_name)
  88. camera_inds.append(camera_ind)
  89. quats_list.append([qw, qx, qy, qz])
  90. ts_list.append([tx, ty, tz])
  91. quats = torch.tensor(quats_list, device=device, dtype=dtype)
  92. ts = torch.tensor(ts_list, device=device, dtype=dtype)
  93. Rs = quaternion_to_rotation_matrix(quats)
  94. extrinsics = torch.eye(4, device=device, dtype=dtype).unsqueeze(0).repeat(num_images, 1, 1)
  95. extrinsics[:, :3, :3] = Rs
  96. extrinsics[:, :3, 3] = ts
  97. fxs = torch.tensor([cameras_params[i].fx for i in camera_inds], device=device, dtype=dtype)
  98. fys = torch.tensor([cameras_params[i].fy for i in camera_inds], device=device, dtype=dtype)
  99. cxs = torch.tensor([cameras_params[i].cx for i in camera_inds], device=device, dtype=dtype)
  100. cys = torch.tensor([cameras_params[i].cy for i in camera_inds], device=device, dtype=dtype)
  101. intrinsics = torch.eye(4, device=device, dtype=dtype).unsqueeze(0).repeat(num_images, 1, 1)
  102. intrinsics[:, 0, 0] = fxs
  103. intrinsics[:, 1, 1] = fys
  104. intrinsics[:, 0, 2] = cxs
  105. intrinsics[:, 1, 2] = cys
  106. heights = torch.tensor([cameras_params[i].height for i in camera_inds], device=device)
  107. widths = torch.tensor([cameras_params[i].width for i in camera_inds], device=device)
  108. cameras = PinholeCamera(
  109. intrinsics,
  110. extrinsics,
  111. heights,
  112. widths,
  113. )
  114. return img_names, cameras
  115. def cameras_for_ids(cameras: PinholeCamera, camera_ids: Union[List[int], Tensor]) -> PinholeCamera:
  116. r"""Take a PinholeCamera camera and camera indices to create a new PinholeCamera for requested cameras.
  117. Args:
  118. cameras: Scene camera object: PinholeCamera
  119. camera_ids: List of camera indices to copy: List[int]
  120. Return:
  121. A new PinholeCamera object with a sub-set of cameras: PinholeCamera
  122. """
  123. intrinsics = cameras.intrinsics[camera_ids]
  124. extrinsics = cameras.extrinsics[camera_ids]
  125. height = cameras.height[camera_ids]
  126. width = cameras.width[camera_ids]
  127. return PinholeCamera(intrinsics, extrinsics, height, width)
  128. def create_spiral_path(cameras: PinholeCamera, rad: float, num_views: int, num_circles: int) -> PinholeCamera:
  129. r"""Create a PinholeCamera object with cameras that follow a spiral path.
  130. Used for novel view synthesis for face facing models.
  131. Args:
  132. cameras: Scene cameras used to train the NeRF model: PinholeCamera
  133. rad: Spiral radius: float
  134. num_views: Number of created cameras: int
  135. num_circles: Number of spiral circles: int
  136. """
  137. # Average locations over all cameras
  138. mean_center = cameras.translation_vector.mean(0, False).squeeze(-1)
  139. device = cameras.intrinsics.device
  140. t = torch.linspace(0, 2 * math.pi * num_circles, num_views, device=device)
  141. cos_t = cos(t) * rad
  142. sin_t = -sin(t) * rad
  143. sin_05t = -sin(0.5 * t) * rad
  144. translation_vector = torch.unsqueeze(mean_center, dim=0) + stack((cos_t, sin_t, sin_05t)).permute((1, 0))
  145. mean_intrinsics = cameras.intrinsics.mean(0, True).repeat(num_views, 1, 1)
  146. mean_extrinsics = cameras.extrinsics.mean(0, True).repeat(num_views, 1, 1)
  147. extrinsics = mean_extrinsics
  148. extrinsics[:, :3, 3] = translation_vector
  149. height = torch.tensor([cameras.height[0]] * num_views, device=device)
  150. width = torch.tensor([cameras.width[0]] * num_views, device=device)
  151. return PinholeCamera(mean_intrinsics, extrinsics, height, width)