2 лет назад · ca2615f357
--- a/experiments/eval_roma_outdoor.py
+++ b/experiments/eval_roma_outdoor.py
@@ -0,0 +1,57 @@
 
				+import os
			
 
				+import torch
			
 
				+from argparse import ArgumentParser
			
 
				+
			
 
				+from torch import nn
			
 
				+from torch.utils.data import ConcatDataset
			
 
				+import torch.distributed as dist
			
 
				+from torch.nn.parallel import DistributedDataParallel as DDP
			
 
				+import json
			
 
				+import wandb
			
 
				+
			
 
				+from roma.benchmarks import MegadepthDenseBenchmark
			
 
				+from roma.benchmarks import MegaDepthPoseEstimationBenchmark, MegadepthDenseBenchmark, HpatchesHomogBenchmark
			
 
				+
			
 
				+def test_mega_8_scenes(model, name):
			
 
				+    mega_8_scenes_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth",
			
 
				+                                                scene_names=['mega_8_scenes_0019_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0025_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0021_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0008_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0032_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_1589_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0063_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0024_0.1_0.3.npz',
			
 
				+                                                    'mega_8_scenes_0019_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0025_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0021_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0008_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0032_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_1589_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0063_0.3_0.5.npz',
			
 
				+                                                    'mega_8_scenes_0024_0.3_0.5.npz'])
			
 
				+    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name)
			
 
				+    print(mega_8_scenes_results)
			
 
				+    json.dump(mega_8_scenes_results, open(f"results/mega_8_scenes_{name}.json", "w"))
			
 
				+
			
 
				+def test_mega1500(model, name):
			
 
				+    mega1500_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth")
			
 
				+    mega1500_results = mega1500_benchmark.benchmark(model, model_name=name)
			
 
				+    json.dump(mega1500_results, open(f"results/mega1500_{name}.json", "w"))
			
 
				+
			
 
				+def test_mega_dense(model, name):
			
 
				+    megadense_benchmark = MegadepthDenseBenchmark("data/megadepth", num_samples = 1000)
			
 
				+    megadense_results = megadense_benchmark.benchmark(model)
			
 
				+    json.dump(megadense_results, open(f"results/mega_dense_{name}.json", "w"))
			
 
				+    
			
 
				+def test_hpatches(model, name):
			
 
				+    hpatches_benchmark = HpatchesHomogBenchmark("data/hpatches")
			
 
				+    hpatches_results = hpatches_benchmark.benchmark(model)
			
 
				+    json.dump(hpatches_results, open(f"results/hpatches_{name}.json", "w"))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    from roma import roma_outdoor
			
 
				+    model = roma_outdoor(device = "cuda", coarse_res = 672, upsample_res = 1344)
			
 
				+    experiment_name = "roma_latest"
			
 
				+    test_mega1500(model, experiment_name)
			
--- a/experiments/train_roma_outdoor.py
+++ b/experiments/train_roma_outdoor.py
@@ -250,7 +250,7 @@ def train(args):
 
				         checkpointer.save(model, optimizer, lr_scheduler, roma.GLOBAL_STEP)
			
 
				         wandb.log(megadense_benchmark.benchmark(model), step = roma.GLOBAL_STEP)
			
 
				 
			
 
				-def test_mega_8_scenes(model, name, resolution, sample_mode):
			
 
				+def test_mega_8_scenes(model, name):
			
 
				     mega_8_scenes_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth",
			
 
				                                                 scene_names=['mega_8_scenes_0019_0.1_0.3.npz',
			
 
				                                                     'mega_8_scenes_0025_0.1_0.3.npz',
			
@@ -268,21 +268,21 @@ def test_mega_8_scenes(model, name, resolution, sample_mode):
 
				                                                     'mega_8_scenes_1589_0.3_0.5.npz',
			
 
				                                                     'mega_8_scenes_0063_0.3_0.5.npz',
			
 
				                                                     'mega_8_scenes_0024_0.3_0.5.npz'])
			
 
				-    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name, scale_intrinsics = False)
			
 
				+    mega_8_scenes_results = mega_8_scenes_benchmark.benchmark(model, model_name=name)
			
 
				     print(mega_8_scenes_results)
			
 
				     json.dump(mega_8_scenes_results, open(f"results/mega_8_scenes_{name}.json", "w"))
			
 
				 
			
 
				-def test_mega1500(model, name, resolution, sample_mode):
			
 
				+def test_mega1500(model, name):
			
 
				     mega1500_benchmark = MegaDepthPoseEstimationBenchmark("data/megadepth")
			
 
				     mega1500_results = mega1500_benchmark.benchmark(model, model_name=name)
			
 
				     json.dump(mega1500_results, open(f"results/mega1500_{name}.json", "w"))
			
 
				 
			
 
				-def test_mega_dense(model, name, resolution, sample_mode):
			
 
				+def test_mega_dense(model, name):
			
 
				     megadense_benchmark = MegadepthDenseBenchmark("data/megadepth", num_samples = 1000)
			
 
				     megadense_results = megadense_benchmark.benchmark(model)
			
 
				     json.dump(megadense_results, open(f"results/mega_dense_{name}.json", "w"))
			
 
				     
			
 
				-def test_hpatches(model, name, resolution, sample_mode):
			
 
				+def test_hpatches(model, name):
			
 
				     hpatches_benchmark = HpatchesHomogBenchmark("data/hpatches")
			
 
				     hpatches_results = hpatches_benchmark.benchmark(model)
			
 
				     json.dump(hpatches_results, open(f"results/hpatches_{name}.json", "w"))
			
@@ -306,18 +306,3 @@ if __name__ == "__main__":
 
				     roma.DEBUG_MODE = args.debug_mode
			
 
				     if not args.only_test:
			
 
				         train(args)
			
 
				-    experiment_name = os.path.splitext(os.path.basename(__file__))[0]
			
 
				-    checkpoint_dir = "workspace/checkpoints/"
			
 
				-    checkpoint_name = checkpoint_dir + experiment_name + ".pth"
			
 
				-    
			
 
				-    test_resolution = "high"
			
 
				-    sample_mode = "threshold_balanced"
			
 
				-    symmetric = True
			
 
				-    upsample_preds = True
			
 
				-    attenuate_cert = True
			
 
				-
			
 
				-    model = get_model(pretrained_backbone=False, resolution = test_resolution, sample_mode = sample_mode, upsample_preds = upsample_preds, symmetric=symmetric, name=experiment_name, attenuate_cert = attenuate_cert)
			
 
				-    model = model.cuda()
			
 
				-    weights = torch.load(checkpoint_name)
			
 
				-    model.load_state_dict(weights)
			
 
				-    test_mega1500(model, experiment_name, resolution = test_resolution, sample_mode = sample_mode)
			
--- a/roma/benchmarks/megadepth_pose_estimation_benchmark.py
+++ b/roma/benchmarks/megadepth_pose_estimation_benchmark.py
@@ -25,8 +25,7 @@ class MegaDepthPoseEstimationBenchmark:
 
				         ]
			
 
				         self.data_root = data_root
			
 
				 
			
 
				-    def benchmark(self, model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
			
 
				-        H,W = model.get_output_resolution()
			
 
				+    def benchmark(self, model, model_name = None):
			
 
				         with torch.no_grad():
			
 
				             data_root = self.data_root
			
 
				             tot_e_t, tot_e_R, tot_e_pose = [], [], []
			
@@ -56,15 +55,14 @@ class MegaDepthPoseEstimationBenchmark:
 
				                         im_A_path, im_B_path, K1.copy(), K2.copy(), T1_to_2.copy()
			
 
				                     )
			
 
				                     sparse_matches,_ = model.sample(
			
 
				-                        dense_matches, dense_certainty, 5000
			
 
				+                        dense_matches, dense_certainty, 5_000
			
 
				                     )
			
 
				                     
			
 
				                     im_A = Image.open(im_A_path)
			
 
				                     w1, h1 = im_A.size
			
 
				                     im_B = Image.open(im_B_path)
			
 
				                     w2, h2 = im_B.size
			
 
				-
			
 
				-                    if scale_intrinsics:
			
 
				+                    if True: # Note: we keep this true as it was used in DKM/RoMa papers. There is very little difference compared to setting to False. 
			
 
				                         scale1 = 1200 / max(w1, h1)
			
 
				                         scale2 = 1200 / max(w2, h2)
			
 
				                         w1, h1 = scale1 * w1, scale1 * h1
			
@@ -73,43 +71,23 @@ class MegaDepthPoseEstimationBenchmark:
 
				                         K1[:2] = K1[:2] * scale1
			
 
				                         K2[:2] = K2[:2] * scale2
			
 
				 
			
 
				-                    kpts1 = sparse_matches[:, :2]
			
 
				-                    kpts1 = (
			
 
				-                        np.stack(
			
 
				-                            (
			
 
				-                                w1 * (kpts1[:, 0] + 1) / 2,
			
 
				-                                h1 * (kpts1[:, 1] + 1) / 2,
			
 
				-                            ),
			
 
				-                            axis=-1,
			
 
				-                        )
			
 
				-                    )
			
 
				-                    kpts2 = sparse_matches[:, 2:]
			
 
				-                    kpts2 = (
			
 
				-                        np.stack(
			
 
				-                            (
			
 
				-                                w2 * (kpts2[:, 0] + 1) / 2,
			
 
				-                                h2 * (kpts2[:, 1] + 1) / 2,
			
 
				-                            ),
			
 
				-                            axis=-1,
			
 
				-                        )
			
 
				-                    )
			
 
				-
			
 
				+                    kpts1, kpts2 = model.to_pixel_coordinates(sparse_matches, h1, w1, h2, w2)
			
 
				+                    kpts1, kpts2 = kpts1.cpu().numpy(), kpts2.cpu().numpy()
			
 
				                     for _ in range(5):
			
 
				                         shuffling = np.random.permutation(np.arange(len(kpts1)))
			
 
				                         kpts1 = kpts1[shuffling]
			
 
				                         kpts2 = kpts2[shuffling]
			
 
				                         try:
			
 
				                             threshold = 0.5 
			
 
				-                            if calibrated:
			
 
				-                                norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
			
 
				-                                R_est, t_est, mask = estimate_pose(
			
 
				-                                    kpts1,
			
 
				-                                    kpts2,
			
 
				-                                    K1,
			
 
				-                                    K2,
			
 
				-                                    norm_threshold,
			
 
				-                                    conf=0.99999,
			
 
				-                                )
			
 
				+                            norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
			
 
				+                            R_est, t_est, mask = estimate_pose(
			
 
				+                                kpts1,
			
 
				+                                kpts2,
			
 
				+                                K1,
			
 
				+                                K2,
			
 
				+                                norm_threshold,
			
 
				+                                conf=0.99999,
			
 
				+                            )
			
 
				                             T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
			
 
				                             e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
			
 
				                             e_pose = max(e_t, e_R)
			
--- a/roma/models/matcher.py
+++ b/roma/models/matcher.py
@@ -424,7 +424,7 @@ class RegressionMatcher(nn.Module):
 
				         decoder,
			
 
				         h=448,
			
 
				         w=448,
			
 
				-        sample_mode = "threshold",
			
 
				+        sample_mode = "threshold_balanced",
			
 
				         upsample_preds = False,
			
 
				         symmetric = False,
			
 
				         name = None,
			
--- a/roma/models/model_zoo/roma_models.py
+++ b/roma/models/model_zoo/roma_models.py
@@ -6,8 +6,8 @@ from roma.models.encoders import *
 
				 
			
 
				 def roma_model(resolution, upsample_preds, device = None, weights=None, dinov2_weights=None, **kwargs):
			
 
				     # roma weights and dinov2 weights are loaded seperately, as dinov2 weights are not parameters
			
 
				-    torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
			
 
				-    torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
			
 
				+    #torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul TODO: these probably ruin stuff, should be careful
			
 
				+    #torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
			
 
				     warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
			
 
				     gp_dim = 512
			
 
				     feat_dim = 512
			
@@ -151,7 +151,8 @@ def roma_model(resolution, upsample_preds, device = None, weights=None, dinov2_w
 
				     h,w = resolution
			
 
				     symmetric = True
			
 
				     attenuate_cert = True
			
 
				+    sample_mode = "threshold_balanced"
			
 
				     matcher = RegressionMatcher(encoder, decoder, h=h, w=w, upsample_preds=upsample_preds, 
			
 
				-                                symmetric = symmetric, attenuate_cert=attenuate_cert, **kwargs).to(device)
			
 
				+                                symmetric = symmetric, attenuate_cert = attenuate_cert, sample_mode = sample_mode, **kwargs).to(device)
			
 
				     matcher.load_state_dict(weights)
			
 
				     return matcher