| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- front_matter = """
- ------------------------------------------------------------------------
- Online demo for [LoFTR](https://zju3dv.github.io/loftr/).
- This demo is heavily inspired by [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/).
- We thank the authors for their execellent work.
- ------------------------------------------------------------------------
- """
- import os
- import argparse
- from pathlib import Path
- import cv2
- import torch
- import numpy as np
- import matplotlib.cm as cm
- os.sys.path.append("../") # Add the project directory
- from src.loftr import LoFTR, default_cfg
- from src.config.default import get_cfg_defaults
- try:
- from demo.utils import (AverageTimer, VideoStreamer,
- make_matching_plot_fast, make_matching_plot, frame2tensor)
- except:
- raise ImportError("This demo requires utils.py from SuperGlue, please use run_demo.sh to start this script.")
- torch.set_grad_enabled(False)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(
- description='LoFTR online demo',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument('--weight', type=str, help="Path to the checkpoint.")
- parser.add_argument(
- '--input', type=str, default='0',
- help='ID of a USB webcam, URL of an IP camera, '
- 'or path to an image directory or movie file')
- parser.add_argument(
- '--output_dir', type=str, default=None,
- help='Directory where to write output frames (If None, no output)')
- parser.add_argument(
- '--image_glob', type=str, nargs='+', default=['*.png', '*.jpg', '*.jpeg'],
- help='Glob if a directory of images is specified')
- parser.add_argument(
- '--skip', type=int, default=1,
- help='Images to skip if input is a movie or directory')
- parser.add_argument(
- '--max_length', type=int, default=1000000,
- help='Maximum length if input is a movie or directory')
- parser.add_argument(
- '--resize', type=int, nargs='+', default=[640, 480],
- help='Resize the input image before running inference. If two numbers, '
- 'resize to the exact dimensions, if one number, resize the max '
- 'dimension, if -1, do not resize')
- parser.add_argument(
- '--no_display', action='store_true',
- help='Do not display images to screen. Useful if running remotely')
- parser.add_argument(
- '--save_video', action='store_true',
- help='Save output (with match visualizations) to a video.')
- parser.add_argument(
- '--save_input', action='store_true',
- help='Save the input images to a video (for gathering repeatable input source).')
- parser.add_argument(
- '--skip_frames', type=int, default=1,
- help="Skip frames from webcam input.")
- parser.add_argument(
- '--top_k', type=int, default=2000, help="The max vis_range (please refer to the code).")
- parser.add_argument(
- '--bottom_k', type=int, default=0, help="The min vis_range (please refer to the code).")
- opt = parser.parse_args()
- print(front_matter)
- parser.print_help()
- if len(opt.resize) == 2 and opt.resize[1] == -1:
- opt.resize = opt.resize[0:1]
- if len(opt.resize) == 2:
- print('Will resize to {}x{} (WxH)'.format(
- opt.resize[0], opt.resize[1]))
- elif len(opt.resize) == 1 and opt.resize[0] > 0:
- print('Will resize max dimension to {}'.format(opt.resize[0]))
- elif len(opt.resize) == 1:
- print('Will not resize images')
- else:
- raise ValueError('Cannot specify more than two integers for --resize')
- if torch.cuda.is_available():
- device = 'cuda'
- else:
- raise RuntimeError("GPU is required to run this demo.")
- # Initialize LoFTR
- matcher = LoFTR(config=default_cfg)
- matcher.load_state_dict(torch.load(opt.weight)['state_dict'])
- matcher = matcher.eval().to(device=device)
- # Configure I/O
- if opt.save_video:
- print('Writing video to loftr-matches.mp4...')
- writer = cv2.VideoWriter('loftr-matches.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (640*2 + 10, 480))
- if opt.save_input:
- print('Writing video to demo-input.mp4...')
- input_writer = cv2.VideoWriter('demo-input.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (640, 480))
- vs = VideoStreamer(opt.input, opt.resize, opt.skip,
- opt.image_glob, opt.max_length)
- frame, ret = vs.next_frame()
- assert ret, 'Error when reading the first frame (try different --input?)'
- frame_id = 0
- last_image_id = 0
- frame_tensor = frame2tensor(frame, device)
- last_data = {'image0': frame_tensor}
- last_frame = frame
- if opt.output_dir is not None:
- print('==> Will write outputs to {}'.format(opt.output_dir))
- Path(opt.output_dir).mkdir(exist_ok=True)
- # Create a window to display the demo.
- if not opt.no_display:
- window_name = 'LoFTR Matches'
- cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
- cv2.resizeWindow(window_name, (640*2, 480))
- else:
- print('Skipping visualization, will not show a GUI.')
- # Print the keyboard help menu.
- print('==> Keyboard control:\n'
- '\tn: select the current frame as the reference image (left)\n'
- '\td/f: move the range of the matches (ranked by confidence) to visualize\n'
- '\tc/v: increase/decrease the length of the visualization range (i.e., total number of matches) to show\n'
- '\tq: quit')
- timer = AverageTimer()
- vis_range = [opt.bottom_k, opt.top_k]
- while True:
- frame_id += 1
- frame, ret = vs.next_frame()
- if frame_id % opt.skip_frames != 0:
- # print("Skipping frame.")
- continue
- if opt.save_input:
- inp = np.stack([frame]*3, -1)
- inp_rgb = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
- input_writer.write(inp_rgb)
- if not ret:
- print('Finished demo_loftr.py')
- break
- timer.update('data')
- stem0, stem1 = last_image_id, vs.i - 1
- frame_tensor = frame2tensor(frame, device)
- last_data = {**last_data, 'image1': frame_tensor}
- matcher(last_data)
- total_n_matches = len(last_data['mkpts0_f'])
- mkpts0 = last_data['mkpts0_f'].cpu().numpy()[vis_range[0]:vis_range[1]]
- mkpts1 = last_data['mkpts1_f'].cpu().numpy()[vis_range[0]:vis_range[1]]
- mconf = last_data['mconf'].cpu().numpy()[vis_range[0]:vis_range[1]]
- # Normalize confidence.
- if len(mconf) > 0:
- conf_vis_min = 0.
- conf_min = mconf.min()
- conf_max = mconf.max()
- mconf = (mconf - conf_vis_min) / (conf_max - conf_vis_min + 1e-5)
- timer.update('forward')
- alpha = 0
- color = cm.jet(mconf, alpha=alpha)
- text = [
- f'LoFTR',
- '# Matches (showing/total): {}/{}'.format(len(mkpts0), total_n_matches),
- ]
- small_text = [
- f'Showing matches from {vis_range[0]}:{vis_range[1]}',
- f'Confidence Range: {conf_min:.2f}:{conf_max:.2f}',
- 'Image Pair: {:06}:{:06}'.format(stem0, stem1),
- ]
- out = make_matching_plot_fast(
- last_frame, frame, mkpts0, mkpts1, mkpts0, mkpts1, color, text,
- path=None, show_keypoints=False, small_text=small_text)
- # Save high quality png, optionally with dynamic alpha support (unreleased yet).
- # save_path = 'demo_vid/{:06}'.format(frame_id)
- # make_matching_plot(
- # last_frame, frame, mkpts0, mkpts1, mkpts0, mkpts1, color, text,
- # path=save_path, show_keypoints=opt.show_keypoints, small_text=small_text)
- if not opt.no_display:
- if opt.save_video:
- writer.write(out)
- cv2.imshow('LoFTR Matches', out)
- key = chr(cv2.waitKey(1) & 0xFF)
- if key == 'q':
- if opt.save_video:
- writer.release()
- if opt.save_input:
- input_writer.release()
- vs.cleanup()
- print('Exiting...')
- break
- elif key == 'n':
- last_data['image0'] = frame_tensor
- last_frame = frame
- last_image_id = (vs.i - 1)
- frame_id_left = frame_id
- elif key in ['d', 'f']:
- if key == 'd':
- if vis_range[0] >= 0:
- vis_range[0] -= 200
- vis_range[1] -= 200
- if key =='f':
- vis_range[0] += 200
- vis_range[1] += 200
- print(f'\nChanged the vis_range to {vis_range[0]}:{vis_range[1]}')
- elif key in ['c', 'v']:
- if key == 'c':
- vis_range[1] -= 50
- if key =='v':
- vis_range[1] += 50
- print(f'\nChanged the vis_range[1] to {vis_range[1]}')
- elif opt.output_dir is not None:
- stem = 'matches_{:06}_{:06}'.format(stem0, stem1)
- out_file = str(Path(opt.output_dir, stem + '.png'))
- print('\nWriting image to {}'.format(out_file))
- cv2.imwrite(out_file, out)
- else:
- raise ValueError("output_dir is required when no display is given.")
- timer.update('viz')
- timer.print()
- cv2.destroyAllWindows()
- vs.cleanup()
|