| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import os.path as osp
- import numpy as np
- import torch
- from romatch.utils import *
- from PIL import Image
- from tqdm import tqdm
- class ScanNetBenchmark:
- def __init__(self, data_root="data/scannet") -> None:
- self.data_root = data_root
- def benchmark(self, model, model_name = None):
- model.train(False)
- with torch.no_grad():
- data_root = self.data_root
- tmp = np.load(osp.join(data_root, "test.npz"))
- pairs, rel_pose = tmp["name"], tmp["rel_pose"]
- tot_e_t, tot_e_R, tot_e_pose = [], [], []
- pair_inds = np.random.choice(
- range(len(pairs)), size=len(pairs), replace=False
- )
- for pairind in tqdm(pair_inds, smoothing=0.9):
- scene = pairs[pairind]
- scene_name = f"scene0{scene[0]}_00"
- im_A_path = osp.join(
- self.data_root,
- "scans_test",
- scene_name,
- "color",
- f"{scene[2]}.jpg",
- )
- im_A = Image.open(im_A_path)
- im_B_path = osp.join(
- self.data_root,
- "scans_test",
- scene_name,
- "color",
- f"{scene[3]}.jpg",
- )
- im_B = Image.open(im_B_path)
- T_gt = rel_pose[pairind].reshape(3, 4)
- R, t = T_gt[:3, :3], T_gt[:3, 3]
- K = np.stack(
- [
- np.array([float(i) for i in r.split()])
- for r in open(
- osp.join(
- self.data_root,
- "scans_test",
- scene_name,
- "intrinsic",
- "intrinsic_color.txt",
- ),
- "r",
- )
- .read()
- .split("\n")
- if r
- ]
- )
- w1, h1 = im_A.size
- w2, h2 = im_B.size
- K1 = K.copy()
- K2 = K.copy()
- dense_matches, dense_certainty = model.match(im_A_path, im_B_path)
- sparse_matches, sparse_certainty = model.sample(
- dense_matches, dense_certainty, 5000
- )
- scale1 = 480 / min(w1, h1)
- scale2 = 480 / min(w2, h2)
- w1, h1 = scale1 * w1, scale1 * h1
- w2, h2 = scale2 * w2, scale2 * h2
- K1 = K1 * scale1
- K2 = K2 * scale2
- offset = 0.5
- kpts1 = sparse_matches[:, :2]
- kpts1 = (
- np.stack(
- (
- w1 * (kpts1[:, 0] + 1) / 2 - offset,
- h1 * (kpts1[:, 1] + 1) / 2 - offset,
- ),
- axis=-1,
- )
- )
- kpts2 = sparse_matches[:, 2:]
- kpts2 = (
- np.stack(
- (
- w2 * (kpts2[:, 0] + 1) / 2 - offset,
- h2 * (kpts2[:, 1] + 1) / 2 - offset,
- ),
- axis=-1,
- )
- )
- for _ in range(5):
- shuffling = np.random.permutation(np.arange(len(kpts1)))
- kpts1 = kpts1[shuffling]
- kpts2 = kpts2[shuffling]
- try:
- norm_threshold = 0.5 / (
- np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
- R_est, t_est, mask = estimate_pose(
- kpts1,
- kpts2,
- K1,
- K2,
- norm_threshold,
- conf=0.99999,
- )
- T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) #
- e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
- e_pose = max(e_t, e_R)
- except Exception as e:
- print(repr(e))
- e_t, e_R = 90, 90
- e_pose = max(e_t, e_R)
- tot_e_t.append(e_t)
- tot_e_R.append(e_R)
- tot_e_pose.append(e_pose)
- tot_e_t.append(e_t)
- tot_e_R.append(e_R)
- tot_e_pose.append(e_pose)
- tot_e_pose = np.array(tot_e_pose)
- thresholds = [5, 10, 20]
- auc = pose_auc(tot_e_pose, thresholds)
- acc_5 = (tot_e_pose < 5).mean()
- acc_10 = (tot_e_pose < 10).mean()
- acc_15 = (tot_e_pose < 15).mean()
- acc_20 = (tot_e_pose < 20).mean()
- map_5 = acc_5
- map_10 = np.mean([acc_5, acc_10])
- map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
- return {
- "auc_5": auc[0],
- "auc_10": auc[1],
- "auc_20": auc[2],
- "map_5": map_5,
- "map_10": map_10,
- "map_20": map_20,
- }
|