megadepth_pose_estimation_benchmark.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import numpy as np
  2. import torch
  3. from romatch.utils import *
  4. from PIL import Image
  5. from tqdm import tqdm
  6. class MegaDepthPoseEstimationBenchmark:
  7. def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
  8. if scene_names is None:
  9. self.scene_names = [
  10. "0015_0.1_0.3.npz",
  11. "0015_0.3_0.5.npz",
  12. "0022_0.1_0.3.npz",
  13. "0022_0.3_0.5.npz",
  14. "0022_0.5_0.7.npz",
  15. ]
  16. else:
  17. self.scene_names = scene_names
  18. self.scenes = [
  19. np.load(f"{data_root}/{scene}", allow_pickle=True)
  20. for scene in self.scene_names
  21. ]
  22. self.data_root = data_root
  23. def benchmark(self, model, model_name = None):
  24. with torch.no_grad():
  25. data_root = self.data_root
  26. tot_e_t, tot_e_R, tot_e_pose = [], [], []
  27. thresholds = [5, 10, 20]
  28. for scene_ind in range(len(self.scenes)):
  29. import os
  30. scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
  31. scene = self.scenes[scene_ind]
  32. pairs = scene["pair_infos"]
  33. intrinsics = scene["intrinsics"]
  34. poses = scene["poses"]
  35. im_paths = scene["image_paths"]
  36. pair_inds = range(len(pairs))
  37. for pairind in (pbar := tqdm(pair_inds, desc = "Current AUC: ?")):
  38. idx1, idx2 = pairs[pairind][0]
  39. K1 = intrinsics[idx1].copy()
  40. T1 = poses[idx1].copy()
  41. R1, t1 = T1[:3, :3], T1[:3, 3]
  42. K2 = intrinsics[idx2].copy()
  43. T2 = poses[idx2].copy()
  44. R2, t2 = T2[:3, :3], T2[:3, 3]
  45. R, t = compute_relative_pose(R1, t1, R2, t2)
  46. T1_to_2 = np.concatenate((R,t[:,None]), axis=-1)
  47. im_A_path = f"{data_root}/{im_paths[idx1]}"
  48. im_B_path = f"{data_root}/{im_paths[idx2]}"
  49. dense_matches, dense_certainty = model.match(
  50. im_A_path, im_B_path, K1.copy(), K2.copy(), T1_to_2.copy()
  51. )
  52. im_A = Image.open(im_A_path)
  53. w1, h1 = im_A.size
  54. im_B = Image.open(im_B_path)
  55. w2, h2 = im_B.size
  56. if True: # Note: we keep this true as it was used in DKM/RoMa papers. There is very little difference compared to setting to False.
  57. scale1 = 1200 / max(w1, h1)
  58. scale2 = 1200 / max(w2, h2)
  59. w1, h1 = scale1 * w1, scale1 * h1
  60. w2, h2 = scale2 * w2, scale2 * h2
  61. K1, K2 = K1.copy(), K2.copy()
  62. K1[:2] = K1[:2] * scale1
  63. K2[:2] = K2[:2] * scale2
  64. for _ in range(5):
  65. sparse_matches,_ = model.sample(
  66. dense_matches, dense_certainty, 5_000
  67. )
  68. kpts1, kpts2 = model.to_pixel_coordinates(sparse_matches, h1, w1, h2, w2)
  69. kpts1, kpts2 = kpts1.cpu().numpy(), kpts2.cpu().numpy()
  70. shuffling = np.random.permutation(np.arange(len(kpts1)))
  71. kpts1 = kpts1[shuffling]
  72. kpts2 = kpts2[shuffling]
  73. try:
  74. threshold = 0.5
  75. norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
  76. R_est, t_est, mask = estimate_pose(
  77. kpts1,
  78. kpts2,
  79. K1,
  80. K2,
  81. norm_threshold,
  82. conf=0.99999,
  83. )
  84. T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) #
  85. e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
  86. e_pose = max(e_t, e_R)
  87. except Exception as e:
  88. print(repr(e))
  89. e_t, e_R = 90, 90
  90. e_pose = max(e_t, e_R)
  91. tot_e_t.append(e_t)
  92. tot_e_R.append(e_R)
  93. tot_e_pose.append(e_pose)
  94. pbar.set_description(f"Current AUC: {pose_auc(tot_e_pose, thresholds)}")
  95. tot_e_pose = np.array(tot_e_pose)
  96. auc = pose_auc(tot_e_pose, thresholds)
  97. acc_5 = (tot_e_pose < 5).mean()
  98. acc_10 = (tot_e_pose < 10).mean()
  99. acc_15 = (tot_e_pose < 15).mean()
  100. acc_20 = (tot_e_pose < 20).mean()
  101. map_5 = acc_5
  102. map_10 = np.mean([acc_5, acc_10])
  103. map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
  104. print(f"{model_name} auc: {auc}")
  105. return {
  106. "auc_5": auc[0],
  107. "auc_10": auc[1],
  108. "auc_20": auc[2],
  109. "map_5": map_5,
  110. "map_10": map_10,
  111. "map_20": map_20,
  112. }