Просмотр исходного кода

Merge pull request #20 from Parskatt/dev

Dev
Johan Edstedt 2 лет назад
Родитель
Сommit
ca2615f357

+ 57 - 0
experiments/eval_roma_outdoor.py

@@ -0,0 +1,57 @@
+import os
+import torch
+from argparse import ArgumentParser
+
+from torch import nn
+from torch.utils.data import ConcatDataset
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+import json
+import wandb
+
+from roma.benchmarks import MegadepthDenseBenchmark
+from roma.benchmarks import MegaDepthPoseEstimationBenchmark, MegadepthDenseBenchmark, HpatchesHomogBenchmark
+
+def test_mega_8_scenes(model, name):
+    mega_8_scenes_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth",
+                                                scene_names=['mega_8_scenes_0019_0.1_0.3.npz',
+                                                    'mega_8_scenes_0025_0.1_0.3.npz',
+                                                    'mega_8_scenes_0021_0.1_0.3.npz',
+                                                    'mega_8_scenes_0008_0.1_0.3.npz',
+                                                    'mega_8_scenes_0032_0.1_0.3.npz',
+                                                    'mega_8_scenes_1589_0.1_0.3.npz',
+                                                    'mega_8_scenes_0063_0.1_0.3.npz',
+                                                    'mega_8_scenes_0024_0.1_0.3.npz',
+                                                    'mega_8_scenes_0019_0.3_0.5.npz',
+                                                    'mega_8_scenes_0025_0.3_0.5.npz',
+                                                    'mega_8_scenes_0021_0.3_0.5.npz',
+                                                    'mega_8_scenes_0008_0.3_0.5.npz',
+                                                    'mega_8_scenes_0032_0.3_0.5.npz',
+                                                    'mega_8_scenes_1589_0.3_0.5.npz',
+                                                    'mega_8_scenes_0063_0.3_0.5.npz',
+                                                    'mega_8_scenes_0024_0.3_0.5.npz'])
+    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name)
+    print(mega_8_scenes_results)
+    json.dump(mega_8_scenes_results, open(f"results/mega_8_scenes_{name}.json", "w"))
+
+def test_mega1500(model, name):
+    mega1500_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth")
+    mega1500_results = mega1500_benchmark.benchmark(model, model_name=name)
+    json.dump(mega1500_results, open(f"results/mega1500_{name}.json", "w"))
+
+def test_mega_dense(model, name):
+    megadense_benchmark = MegadepthDenseBenchmark("data/megadepth", num_samples = 1000)
+    megadense_results = megadense_benchmark.benchmark(model)
+    json.dump(megadense_results, open(f"results/mega_dense_{name}.json", "w"))
+    
+def test_hpatches(model, name):
+    hpatches_benchmark = HpatchesHomogBenchmark("data/hpatches")
+    hpatches_results = hpatches_benchmark.benchmark(model)
+    json.dump(hpatches_results, open(f"results/hpatches_{name}.json", "w"))
+
+
+if __name__ == "__main__":
+    from roma import roma_outdoor
+    model = roma_outdoor(device = "cuda", coarse_res = 672, upsample_res = 1344)
+    experiment_name = "roma_latest"
+    test_mega1500(model, experiment_name)

+ 5 - 20
experiments/roma_outdoor.py → experiments/train_roma_outdoor.py

@@ -250,7 +250,7 @@ def train(args):
         checkpointer.save(model, optimizer, lr_scheduler, roma.GLOBAL_STEP)
         wandb.log(megadense_benchmark.benchmark(model), step = roma.GLOBAL_STEP)
 
-def test_mega_8_scenes(model, name, resolution, sample_mode):
+def test_mega_8_scenes(model, name):
     mega_8_scenes_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth",
                                                 scene_names=['mega_8_scenes_0019_0.1_0.3.npz',
                                                     'mega_8_scenes_0025_0.1_0.3.npz',
@@ -268,21 +268,21 @@ def test_mega_8_scenes(model, name, resolution, sample_mode):
                                                     'mega_8_scenes_1589_0.3_0.5.npz',
                                                     'mega_8_scenes_0063_0.3_0.5.npz',
                                                     'mega_8_scenes_0024_0.3_0.5.npz'])
-    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name, scale_intrinsics = False)
+    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name)
     print(mega_8_scenes_results)
     json.dump(mega_8_scenes_results, open(f"results/mega_8_scenes_{name}.json", "w"))
 
-def test_mega1500(model, name, resolution, sample_mode):
+def test_mega1500(model, name):
     mega1500_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth")
     mega1500_results = mega1500_benchmark.benchmark(model, model_name=name)
     json.dump(mega1500_results, open(f"results/mega1500_{name}.json", "w"))
 
-def test_mega_dense(model, name, resolution, sample_mode):
+def test_mega_dense(model, name):
     megadense_benchmark = MegadepthDenseBenchmark("data/megadepth", num_samples = 1000)
     megadense_results = megadense_benchmark.benchmark(model)
     json.dump(megadense_results, open(f"results/mega_dense_{name}.json", "w"))
     
-def test_hpatches(model, name, resolution, sample_mode):
+def test_hpatches(model, name):
     hpatches_benchmark = HpatchesHomogBenchmark("data/hpatches")
     hpatches_results = hpatches_benchmark.benchmark(model)
     json.dump(hpatches_results, open(f"results/hpatches_{name}.json", "w"))
@@ -306,18 +306,3 @@ if __name__ == "__main__":
     roma.DEBUG_MODE = args.debug_mode
     if not args.only_test:
         train(args)
-    experiment_name = os.path.splitext(os.path.basename(__file__))[0]
-    checkpoint_dir = "workspace/checkpoints/"
-    checkpoint_name = checkpoint_dir + experiment_name + ".pth"
-    
-    test_resolution = "high"
-    sample_mode = "threshold_balanced"
-    symmetric = True
-    upsample_preds = True
-    attenuate_cert = True
-
-    model = get_model(pretrained_backbone=False, resolution = test_resolution, sample_mode = sample_mode, upsample_preds = upsample_preds, symmetric=symmetric, name=experiment_name, attenuate_cert = attenuate_cert)
-    model = model.cuda()
-    weights = torch.load(checkpoint_name)
-    model.load_state_dict(weights)
-    test_mega1500(model, experiment_name, resolution = test_resolution, sample_mode = sample_mode)

+ 14 - 36
roma/benchmarks/megadepth_pose_estimation_benchmark.py

@@ -25,8 +25,7 @@ class MegaDepthPoseEstimationBenchmark:
         ]
         self.data_root = data_root
 
-    def benchmark(self, model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
-        H,W = model.get_output_resolution()
+    def benchmark(self, model, model_name = None):
         with torch.no_grad():
             data_root = self.data_root
             tot_e_t, tot_e_R, tot_e_pose = [], [], []
@@ -56,15 +55,14 @@ class MegaDepthPoseEstimationBenchmark:
                         im_A_path, im_B_path, K1.copy(), K2.copy(), T1_to_2.copy()
                     )
                     sparse_matches,_ = model.sample(
-                        dense_matches, dense_certainty, 5000
+                        dense_matches, dense_certainty, 5_000
                     )
                     
                     im_A = Image.open(im_A_path)
                     w1, h1 = im_A.size
                     im_B = Image.open(im_B_path)
                     w2, h2 = im_B.size
-
-                    if scale_intrinsics:
+                    if True: # Note: we keep this true as it was used in DKM/RoMa papers. There is very little difference compared to setting to False. 
                         scale1 = 1200 / max(w1, h1)
                         scale2 = 1200 / max(w2, h2)
                         w1, h1 = scale1 * w1, scale1 * h1
@@ -73,43 +71,23 @@ class MegaDepthPoseEstimationBenchmark:
                         K1[:2] = K1[:2] * scale1
                         K2[:2] = K2[:2] * scale2
 
-                    kpts1 = sparse_matches[:, :2]
-                    kpts1 = (
-                        np.stack(
-                            (
-                                w1 * (kpts1[:, 0] + 1) / 2,
-                                h1 * (kpts1[:, 1] + 1) / 2,
-                            ),
-                            axis=-1,
-                        )
-                    )
-                    kpts2 = sparse_matches[:, 2:]
-                    kpts2 = (
-                        np.stack(
-                            (
-                                w2 * (kpts2[:, 0] + 1) / 2,
-                                h2 * (kpts2[:, 1] + 1) / 2,
-                            ),
-                            axis=-1,
-                        )
-                    )
-
+                    kpts1, kpts2 = model.to_pixel_coordinates(sparse_matches, h1, w1, h2, w2)
+                    kpts1, kpts2 = kpts1.cpu().numpy(), kpts2.cpu().numpy()
                     for _ in range(5):
                         shuffling = np.random.permutation(np.arange(len(kpts1)))
                         kpts1 = kpts1[shuffling]
                         kpts2 = kpts2[shuffling]
                         try:
                             threshold = 0.5 
-                            if calibrated:
-                                norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
-                                R_est, t_est, mask = estimate_pose(
-                                    kpts1,
-                                    kpts2,
-                                    K1,
-                                    K2,
-                                    norm_threshold,
-                                    conf=0.99999,
-                                )
+                            norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
+                            R_est, t_est, mask = estimate_pose(
+                                kpts1,
+                                kpts2,
+                                K1,
+                                K2,
+                                norm_threshold,
+                                conf=0.99999,
+                            )
                             T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
                             e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
                             e_pose = max(e_t, e_R)

+ 1 - 1
roma/models/matcher.py

@@ -424,7 +424,7 @@ class RegressionMatcher(nn.Module):
         decoder,
         h=448,
         w=448,
-        sample_mode = "threshold",
+        sample_mode = "threshold_balanced",
         upsample_preds = False,
         symmetric = False,
         name = None,

+ 4 - 3
roma/models/model_zoo/roma_models.py

@@ -6,8 +6,8 @@ from roma.models.encoders import *
 
 def roma_model(resolution, upsample_preds, device = None, weights=None, dinov2_weights=None, **kwargs):
     # roma weights and dinov2 weights are loaded seperately, as dinov2 weights are not parameters
-    torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
-    torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
+    #torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul TODO: these probably ruin stuff, should be careful
+    #torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
     warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
     gp_dim = 512
     feat_dim = 512
@@ -151,7 +151,8 @@ def roma_model(resolution, upsample_preds, device = None, weights=None, dinov2_w
     h,w = resolution
     symmetric = True
     attenuate_cert = True
+    sample_mode = "threshold_balanced"
     matcher = RegressionMatcher(encoder, decoder, h=h, w=w, upsample_preds=upsample_preds, 
-                                symmetric = symmetric, attenuate_cert=attenuate_cert, **kwargs).to(device)
+                                symmetric = symmetric, attenuate_cert = attenuate_cert, sample_mode = sample_mode, **kwargs).to(device)
     matcher.load_state_dict(weights)
     return matcher