demo_roma_camera_position_async.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. #!/usr/bin/env python3
  2. """
  3. RoMa demo with asynchronous video streaming and camera position tracking.
  4. This script mirrors the input handling style used in the LightGlue demos:
  5. - webcam / IP camera / video file / image directory
  6. - UDP JPEG stream via udp://host:port (requires udp_jpeg_receiver.py)
  7. - async frame reader to reduce capture-side blocking
  8. """
  9. from pathlib import Path
  10. import argparse
  11. import queue
  12. import threading
  13. import time
  14. import os
  15. import sys
  16. import cv2
  17. import numpy as np
  18. import torch
  19. from PIL import Image
  20. from romatch import roma_outdoor, tiny_roma_v1_outdoor
  21. try:
  22. from udp_jpeg_receiver import UDPJPEGReceiver
  23. except ImportError:
  24. UDPJPEGReceiver = None
  25. torch.set_grad_enabled(False)
  26. class AverageTimer:
  27. """Class to help manage printing simple timing of code execution."""
  28. def __init__(self, smoothing=0.3, newline=False):
  29. self.smoothing = smoothing
  30. self.newline = newline
  31. self.times = {}
  32. self.will_print = {}
  33. self.reset()
  34. def reset(self):
  35. now = time.time()
  36. self.start = now
  37. self.last_time = now
  38. for name in self.will_print:
  39. self.will_print[name] = False
  40. def update(self, name="default"):
  41. now = time.time()
  42. dt = now - self.last_time
  43. if name in self.times:
  44. dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name]
  45. self.times[name] = dt
  46. self.will_print[name] = True
  47. self.last_time = now
  48. def print(self, text="Timer"):
  49. total = 0.0
  50. print(f"[{text}]", end=" ")
  51. for key in self.times:
  52. val = self.times[key]
  53. if self.will_print[key]:
  54. print(f"{key}={val:.3f}", end=" ")
  55. total += val
  56. if total > 0:
  57. print(f"total={total:.3f} sec {{{1.0 / total:.1f} FPS}}", end=" ")
  58. else:
  59. print("total=0.000 sec {inf FPS}", end=" ")
  60. if self.newline:
  61. print(flush=True)
  62. else:
  63. print(end="\r", flush=True)
  64. self.reset()
  65. class VideoStreamer:
  66. """Class to help with reading images from a video stream."""
  67. def __init__(self, source, resize, skip, image_glob, max_length=1_000_000):
  68. self.source = source
  69. self.skip = skip
  70. self.max_length = max_length
  71. self.resize = resize
  72. self.i = 0
  73. self.cap = None
  74. self.is_ip_camera = False
  75. self.is_udp_jpeg = False
  76. self.udp_receiver = None
  77. self._is_digit_source = isinstance(source, int) or (
  78. isinstance(source, str) and source.isdigit()
  79. )
  80. if isinstance(source, str) and source.startswith("udp://"):
  81. if UDPJPEGReceiver is None:
  82. raise ImportError(
  83. "UDPJPEGReceiver not available. Make sure udp_jpeg_receiver.py exists."
  84. )
  85. parts = source.replace("udp://", "").split(":")
  86. if len(parts) == 2:
  87. host = parts[0] if parts[0] else "0.0.0.0"
  88. port = int(parts[1])
  89. else:
  90. host = "0.0.0.0"
  91. port = int(parts[0])
  92. self.is_udp_jpeg = True
  93. self.udp_receiver = UDPJPEGReceiver(host=host, port=port)
  94. self.udp_receiver.start()
  95. print(f"UDP JPEG receiver initialized: {host}:{port}")
  96. elif Path(source).is_dir():
  97. self.listing = []
  98. for ext in image_glob:
  99. self.listing.extend(list(Path(source).glob(ext)))
  100. self.listing = self.listing[: self.max_length]
  101. self.max_length = len(self.listing)
  102. if self.max_length == 0:
  103. raise IOError(f"No images found in directory: {source}")
  104. print(f"Found {self.max_length} images in {source}")
  105. elif Path(source).exists():
  106. self.cap = cv2.VideoCapture(source)
  107. else:
  108. if not self._is_digit_source and not Path(source).exists():
  109. self.is_ip_camera = True
  110. self.cap = cv2.VideoCapture(source, cv2.CAP_FFMPEG)
  111. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
  112. else:
  113. self.cap = cv2.VideoCapture(int(source) if self._is_digit_source else source)
  114. def _resize_if_needed(self, frame):
  115. if len(self.resize) == 2:
  116. h, w = frame.shape[:2]
  117. if w != self.resize[0] or h != self.resize[1]:
  118. frame = cv2.resize(frame, tuple(self.resize))
  119. elif len(self.resize) == 1 and self.resize[0] > 0:
  120. h, w = frame.shape[:2]
  121. scale = self.resize[0] / max(h, w)
  122. new_w, new_h = int(w * scale), int(h * scale)
  123. frame = cv2.resize(frame, (new_w, new_h))
  124. return frame
  125. def next_frame(self):
  126. if self.is_udp_jpeg:
  127. frame = self.udp_receiver.get_image(timeout=0.1)
  128. if frame is None:
  129. return None, False
  130. return self._resize_if_needed(frame), True
  131. if self.cap is not None:
  132. if self.is_ip_camera:
  133. for _ in range(2):
  134. if not self.cap.grab():
  135. break
  136. ret, frame = self.cap.read()
  137. if not ret:
  138. return None, False
  139. frame = self._resize_if_needed(frame)
  140. for _ in range(self.skip):
  141. ret, _ = self.cap.read()
  142. if not ret:
  143. break
  144. return frame, True
  145. if self.i >= self.max_length:
  146. return None, False
  147. image_file = self.listing[self.i]
  148. frame = cv2.imread(str(image_file), cv2.IMREAD_COLOR)
  149. if frame is None:
  150. print(f"Failed to load image: {image_file}")
  151. return None, False
  152. self.i += 1
  153. return self._resize_if_needed(frame), True
  154. def cleanup(self):
  155. if self.is_udp_jpeg and self.udp_receiver is not None:
  156. self.udp_receiver.stop()
  157. if self.cap is not None:
  158. self.cap.release()
  159. class AsyncVideoStreamer:
  160. """Background frame reader that keeps only the latest frame."""
  161. def __init__(self, streamer: VideoStreamer, queue_size: int = 1, timeout: float = 1.0):
  162. self.streamer = streamer
  163. self.queue: "queue.Queue[np.ndarray]" = queue.Queue(maxsize=max(queue_size, 1))
  164. self.timeout = timeout
  165. self._stop_requested = False
  166. self._has_error = False
  167. self._thread = threading.Thread(target=self._reader, name="AsyncVideoStreamer", daemon=True)
  168. self._thread.start()
  169. def _reader(self):
  170. try:
  171. while not self._stop_requested:
  172. frame, ret = self.streamer.next_frame()
  173. if not ret:
  174. if hasattr(self.streamer, "is_udp_jpeg") and self.streamer.is_udp_jpeg:
  175. time.sleep(0.01)
  176. continue
  177. self._stop_requested = True
  178. break
  179. if self.queue.full():
  180. try:
  181. self.queue.get_nowait()
  182. except queue.Empty:
  183. pass
  184. self.queue.put(frame)
  185. except Exception as exc: # pylint: disable=broad-except
  186. self._has_error = True
  187. print(f"[AsyncVideoStreamer] Reader thread error: {exc}")
  188. finally:
  189. self._stop_requested = True
  190. def read(self):
  191. if self._has_error:
  192. return None, False
  193. try:
  194. frame = self.queue.get(timeout=self.timeout)
  195. return frame, True
  196. except queue.Empty:
  197. return None, False
  198. def stop(self):
  199. self._stop_requested = True
  200. if self._thread.is_alive():
  201. self._thread.join(timeout=1.0)
  202. self.streamer.cleanup()
  203. def maybe_resize(input_frame, resize_opt):
  204. if len(resize_opt) == 2:
  205. return cv2.resize(input_frame, tuple(resize_opt))
  206. if len(resize_opt) == 1 and resize_opt[0] > 0:
  207. h, w = input_frame.shape[:2]
  208. scale = resize_opt[0] / max(h, w)
  209. return cv2.resize(input_frame, (int(w * scale), int(h * scale)))
  210. return input_frame
  211. def apply_orientation(frame, opt):
  212. if opt.rotate == 90:
  213. frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
  214. elif opt.rotate == 180:
  215. frame = cv2.rotate(frame, cv2.ROTATE_180)
  216. elif opt.rotate == 270:
  217. frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
  218. if opt.flip_horizontal:
  219. frame = cv2.flip(frame, 1)
  220. if opt.flip_vertical:
  221. frame = cv2.flip(frame, 0)
  222. return frame
  223. def draw_camera_position_on_reference(
  224. reference_frame_bgr,
  225. camera_center_ref,
  226. has_valid_pose,
  227. num_matches=0,
  228. min_matches=30,
  229. inliers_ratio=0.0,
  230. trail_points=None,
  231. trail_thickness=2,
  232. status_text="",
  233. ):
  234. h_ref, w_ref = reference_frame_bgr.shape[:2]
  235. ref_colored = reference_frame_bgr.copy()
  236. center_ref = (w_ref // 2, h_ref // 2)
  237. cv2.circle(ref_colored, center_ref, 12, (0, 255, 0), 2)
  238. cv2.line(ref_colored, (center_ref[0] - 15, center_ref[1]), (center_ref[0] + 15, center_ref[1]), (0, 255, 0), 2)
  239. cv2.line(ref_colored, (center_ref[0], center_ref[1] - 15), (center_ref[0], center_ref[1] + 15), (0, 255, 0), 2)
  240. if (not has_valid_pose) or camera_center_ref is None or num_matches < min_matches:
  241. if not status_text:
  242. status_text = f"Pose unavailable. matches={num_matches}, inliers={inliers_ratio:.1%}"
  243. cv2.putText(ref_colored, status_text, (10, 30),
  244. cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
  245. return ref_colored
  246. cxy = (int(camera_center_ref[0]), int(camera_center_ref[1]))
  247. if trail_points is not None and len(trail_points) >= 2:
  248. for idx in range(1, len(trail_points)):
  249. p0 = (int(trail_points[idx - 1][0]), int(trail_points[idx - 1][1]))
  250. p1 = (int(trail_points[idx][0]), int(trail_points[idx][1]))
  251. cv2.line(ref_colored, p0, p1, (255, 255, 0), trail_thickness, cv2.LINE_AA)
  252. cv2.circle(ref_colored, cxy, 12, (0, 0, 255), 2)
  253. cv2.line(ref_colored, (cxy[0] - 15, cxy[1]), (cxy[0] + 15, cxy[1]), (0, 0, 255), 2)
  254. cv2.line(ref_colored, (cxy[0], cxy[1] - 15), (cxy[0], cxy[1] + 15), (0, 0, 255), 2)
  255. cv2.line(ref_colored, center_ref, cxy, (255, 0, 255), 2)
  256. cv2.putText(ref_colored, f"Matches: {num_matches}", (10, 30),
  257. cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
  258. cv2.putText(ref_colored, f"Inliers: {inliers_ratio:.1%}", (10, 60),
  259. cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
  260. return ref_colored
  261. def frame_bgr_to_pil_rgb(frame_bgr):
  262. return Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
  263. def parse_args():
  264. parser = argparse.ArgumentParser(
  265. description="RoMa demo (asynchronous capture)",
  266. formatter_class=argparse.ArgumentDefaultsHelpFormatter,
  267. )
  268. parser.add_argument("--input", type=str, default="0", help="USB webcam index, IP camera URL, UDP stream (udp://host:port), or video path")
  269. parser.add_argument("--reference_image", type=str, default=None, help="Optional reference image path")
  270. parser.add_argument("--output_dir", type=str, default=None, help="Directory to save visualisations")
  271. parser.add_argument("--image_glob", type=str, nargs="+", default=["*.png", "*.jpg", "*.jpeg"], help="Glob for image sequences")
  272. parser.add_argument("--skip", type=int, default=0, help="Number of frames to skip between reads")
  273. parser.add_argument("--max_length", type=int, default=1_000_000, help="Maximum frames")
  274. parser.add_argument(
  275. "--resize",
  276. type=int,
  277. nargs="+",
  278. default=[640, 480],
  279. help="Resize input image. Two numbers = width height, one number = max dimension, -1 = no resize",
  280. )
  281. parser.add_argument("--model", type=str, default="tiny", choices=["tiny", "roma"], help="Model variant")
  282. parser.add_argument("--sample_num", type=int, default=2500, help="Number of sampled correspondences")
  283. parser.add_argument("--sample_thresh", type=float, default=0.05, help="Sampling certainty threshold")
  284. parser.add_argument("--ransac_reproj_threshold", type=float, default=3.0, help="RANSAC reprojection threshold in pixels")
  285. parser.add_argument("--min_matches", type=int, default=30, help="Minimum matches required to compute homography")
  286. parser.add_argument("--min_inlier_ratio", type=float, default=0.25, help="Minimum inlier ratio to accept homography")
  287. parser.add_argument("--smooth_alpha", type=float, default=0.8, help="EMA smoothing factor for camera position")
  288. parser.add_argument("--roma_interval", type=int, default=10, help="Run RoMa every N frames (higher N = faster)")
  289. parser.add_argument("--trail_len", type=int, default=80, help="Max number of points in trajectory")
  290. parser.add_argument("--trail_thickness", type=int, default=2, help="Trajectory line thickness")
  291. parser.add_argument("--queue_size", type=int, default=1, help="Frame queue size for async reader")
  292. parser.add_argument("--read_timeout", type=float, default=1.0, help="Seconds to wait for a frame from async reader")
  293. parser.add_argument("--flip_horizontal", action="store_true", help="Flip frames horizontally")
  294. parser.add_argument("--flip_vertical", action="store_true", help="Flip frames vertically")
  295. parser.add_argument("--rotate", type=int, default=0, choices=[0, 90, 180, 270], help="Rotate frames clockwise")
  296. parser.add_argument("--show_fps", action="store_true", help="Render FPS overlay")
  297. parser.add_argument("--max_fps", type=float, default=90.0, help="Cap processing FPS (<=0 disables cap)")
  298. parser.add_argument(
  299. "--timer_print_interval",
  300. type=float,
  301. default=0.5,
  302. help="Seconds between console timer prints (0 = every frame).",
  303. )
  304. parser.add_argument(
  305. "--max_display_fps",
  306. type=float,
  307. default=0.0,
  308. help="Cap rendering/display FPS only (0 = unlimited).",
  309. )
  310. parser.add_argument(
  311. "--idle_sleep_ms",
  312. type=float,
  313. default=2.0,
  314. help="Sleep time in milliseconds when a frame is skipped for rendering (<=0 disables).",
  315. )
  316. parser.add_argument("--force_cpu", action="store_true", help="Run inference on CPU even if CUDA is available")
  317. parser.add_argument("--no_ip_grab", action="store_true", help="Disable extra grab calls for IP cameras")
  318. parser.add_argument("--no_display", action="store_true", help="Disable OpenCV window")
  319. parser.add_argument("--no_ui", action="store_true", help="Suppress console output (UI embedding)")
  320. return parser.parse_args()
  321. def main():
  322. opt = parse_args()
  323. if len(opt.resize) == 2 and opt.resize[1] == -1:
  324. opt.resize = opt.resize[0:1]
  325. if len(opt.resize) == 2:
  326. print(f"Will resize to {opt.resize[0]}x{opt.resize[1]} (WxH)")
  327. elif len(opt.resize) == 1 and opt.resize[0] > 0:
  328. print(f"Will resize max dimension to {opt.resize[0]}")
  329. elif len(opt.resize) == 1:
  330. print("Will not resize images")
  331. else:
  332. raise ValueError("Cannot specify more than two integers for --resize")
  333. if opt.no_ui:
  334. sys.stdout = open(os.devnull, "w")
  335. sys.stderr = open(os.devnull, "w")
  336. device = "cuda" if torch.cuda.is_available() and not opt.force_cpu else "cpu"
  337. print(f'Running inference on device "{device}"')
  338. if opt.model == "tiny":
  339. roma_model = tiny_roma_v1_outdoor(device=torch.device(device))
  340. else:
  341. roma_model = roma_outdoor(device=torch.device(device))
  342. roma_model.sample_thresh = opt.sample_thresh
  343. if opt.reference_image is not None:
  344. print(f"==> Loading reference image: {opt.reference_image}")
  345. ref_frame = cv2.imread(opt.reference_image, cv2.IMREAD_COLOR)
  346. if ref_frame is None:
  347. raise IOError(f"Cannot load reference image: {opt.reference_image}")
  348. ref_frame = maybe_resize(ref_frame, opt.resize)
  349. ref_frame = apply_orientation(ref_frame, opt)
  350. else:
  351. ref_frame = None
  352. streamer = VideoStreamer(opt.input, opt.resize, opt.skip, opt.image_glob, opt.max_length)
  353. if hasattr(streamer, "is_udp_jpeg") and streamer.is_udp_jpeg:
  354. print("UDP JPEG mode: receiver started in background thread")
  355. elif hasattr(streamer, "cap") and streamer.cap is not None and opt.no_ip_grab and hasattr(streamer, "is_ip_camera"):
  356. streamer.is_ip_camera = False
  357. print("IP camera buffer flush disabled (no extra grab calls).")
  358. async_streamer = AsyncVideoStreamer(streamer, queue_size=opt.queue_size, timeout=opt.read_timeout)
  359. if ref_frame is None:
  360. first_frame, ret = async_streamer.read()
  361. if not ret:
  362. raise RuntimeError("Error when reading first frame.")
  363. ref_frame = apply_orientation(first_frame, opt)
  364. print("Using first frame as reference")
  365. if opt.output_dir is not None:
  366. Path(opt.output_dir).mkdir(exist_ok=True)
  367. print(f"==> Will write outputs to {opt.output_dir}")
  368. window_name_ref = "Camera Position in Reference"
  369. window_name_cam = "Live Camera"
  370. if not opt.no_display:
  371. try:
  372. cv2.namedWindow(window_name_ref, cv2.WINDOW_NORMAL)
  373. cv2.resizeWindow(window_name_ref, 640, 480)
  374. cv2.namedWindow(window_name_cam, cv2.WINDOW_NORMAL)
  375. cv2.resizeWindow(window_name_cam, 640, 480)
  376. except cv2.error as err:
  377. print(f"Warning: Could not create OpenCV windows: {err}")
  378. opt.no_display = True
  379. print("==> Keyboard control:\n\tn: set current frame as reference\n\tq: quit\n\tf: toggle FPS overlay\n")
  380. timer = AverageTimer(newline=True)
  381. show_fps = opt.show_fps
  382. fps_display = 0.0
  383. last_fps_print_time = time.time()
  384. fps_print_interval = 2.0
  385. last_timer_print_time = time.time()
  386. last_display_time = time.perf_counter()
  387. frame_id = 0
  388. # Global / fast-tracker state
  389. last_good_H_slow = None # from RoMa
  390. H_ref2cur_fast = None # fused pose used for drawing
  391. prev_gray = None
  392. prev_kpts = None
  393. orb = cv2.ORB_create(nfeatures=800)
  394. last_camera_center_ref = None
  395. trail_points = []
  396. try:
  397. while True:
  398. loop_start_time = time.time()
  399. frame_id += 1
  400. frame, ret = async_streamer.read()
  401. if not ret:
  402. if hasattr(streamer, "is_udp_jpeg") and streamer.is_udp_jpeg:
  403. continue
  404. print("Stream ended or timeout exceeded.")
  405. break
  406. frame = apply_orientation(frame, opt)
  407. timer.update("data")
  408. h_ref, w_ref = ref_frame.shape[:2]
  409. h_cur, w_cur = frame.shape[:2]
  410. H_ref_to_cur = None
  411. inliers_ratio = 0.0
  412. num_matches = 0
  413. status_text = ""
  414. # -------- Slow level: RoMa global alignment every roma_interval frames --------
  415. run_roma_this_frame = (frame_id % opt.roma_interval == 0) or (last_good_H_slow is None)
  416. if run_roma_this_frame:
  417. ref_pil = frame_bgr_to_pil_rgb(ref_frame)
  418. cur_pil = frame_bgr_to_pil_rgb(frame)
  419. if opt.model == "tiny":
  420. warp, certainty = roma_model.match(ref_pil, cur_pil)
  421. else:
  422. warp, certainty = roma_model.match(ref_pil, cur_pil, device=torch.device(device))
  423. matches, _certainty_samples = roma_model.sample(warp, certainty, num=opt.sample_num)
  424. k_ref, k_cur = roma_model.to_pixel_coordinates(matches, h_ref, w_ref, h_cur, w_cur)
  425. pts_ref = k_ref.detach().cpu().numpy().astype(np.float32)
  426. pts_cur = k_cur.detach().cpu().numpy().astype(np.float32)
  427. num_matches = len(pts_ref)
  428. if num_matches >= opt.min_matches:
  429. H_tmp, mask = cv2.findHomography(
  430. pts_ref, pts_cur, cv2.RANSAC, opt.ransac_reproj_threshold
  431. )
  432. if H_tmp is not None and mask is not None:
  433. inliers_ratio = float(mask.mean())
  434. if inliers_ratio >= opt.min_inlier_ratio:
  435. last_good_H_slow = H_tmp
  436. H_ref2cur_fast = H_tmp.copy()
  437. status_text = f"RoMa pose. matches={num_matches}, inliers={inliers_ratio:.1%}"
  438. else:
  439. status_text = (
  440. f"RoMa low inliers: {inliers_ratio:.1%} < {opt.min_inlier_ratio:.1%}"
  441. )
  442. else:
  443. status_text = "RoMa homography failed."
  444. else:
  445. status_text = f"RoMa insufficient matches: {num_matches}/{opt.min_matches}"
  446. # reset fast tracker state on each RoMa update
  447. prev_gray = None
  448. prev_kpts = None
  449. timer.update("forward")
  450. else:
  451. # -------- Fast level: ORB + LK between consecutive frames --------
  452. frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  453. if prev_gray is None:
  454. prev_gray = frame_gray
  455. prev_kpts = None
  456. if prev_kpts is None or len(prev_kpts) < 100:
  457. kpts = orb.detect(prev_gray, None)
  458. kpts = sorted(kpts, key=lambda k: -k.response)[:500]
  459. prev_kpts = np.array([k.pt for k in kpts], dtype=np.float32) if kpts else None
  460. if prev_kpts is not None and len(prev_kpts) >= 20:
  461. next_pts, status, _err = cv2.calcOpticalFlowPyrLK(
  462. prev_gray, frame_gray, prev_kpts, None,
  463. winSize=(21, 21), maxLevel=3,
  464. criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01),
  465. )
  466. st = status.squeeze()
  467. good_prev = prev_kpts[st == 1]
  468. good_next = next_pts[st == 1]
  469. num_matches = len(good_prev)
  470. if num_matches >= 20:
  471. H_prev2cur_fast, mask = cv2.findHomography(
  472. good_prev, good_next, cv2.RANSAC, 3.0
  473. )
  474. if H_prev2cur_fast is not None and H_ref2cur_fast is not None:
  475. inliers_ratio = float(mask.mean()) if mask is not None else 0.0
  476. H_ref2cur_fast = H_prev2cur_fast @ H_ref2cur_fast
  477. status_text = f"Fast pose. matches={num_matches}, inliers={inliers_ratio:.1%}"
  478. else:
  479. status_text = "Fast homography failed."
  480. prev_kpts = good_next.reshape(-1, 2)
  481. prev_gray = frame_gray
  482. else:
  483. status_text = "Fast tracker: not enough keypoints."
  484. timer.update("forward")
  485. camera_center_current = (w_cur // 2, h_cur // 2)
  486. camera_center_ref = None
  487. has_valid_pose = False
  488. H_for_pose = H_ref2cur_fast if H_ref2cur_fast is not None else last_good_H_slow
  489. if H_for_pose is not None and num_matches >= opt.min_matches:
  490. try:
  491. H_cur_to_ref = np.linalg.inv(H_for_pose)
  492. center_cur = np.array([[camera_center_current]], dtype=np.float32)
  493. center_ref_now = cv2.perspectiveTransform(center_cur, H_cur_to_ref)[0, 0]
  494. center_ref_now = np.clip(center_ref_now, [0, 0], [w_ref - 1, h_ref - 1])
  495. if last_camera_center_ref is None:
  496. camera_center_ref = center_ref_now
  497. else:
  498. camera_center_ref = (
  499. opt.smooth_alpha * last_camera_center_ref
  500. + (1.0 - opt.smooth_alpha) * center_ref_now
  501. )
  502. last_camera_center_ref = camera_center_ref
  503. has_valid_pose = True
  504. trail_points.append(camera_center_ref.copy())
  505. if len(trail_points) > opt.trail_len:
  506. trail_points = trail_points[-opt.trail_len :]
  507. except np.linalg.LinAlgError:
  508. has_valid_pose = False
  509. should_render = True
  510. if opt.max_display_fps and opt.max_display_fps > 0:
  511. display_dt = 1.0 / float(opt.max_display_fps)
  512. if (time.perf_counter() - last_display_time) < display_dt:
  513. should_render = False
  514. if should_render:
  515. ref_view = draw_camera_position_on_reference(
  516. ref_frame,
  517. camera_center_ref,
  518. has_valid_pose,
  519. num_matches=num_matches,
  520. min_matches=opt.min_matches,
  521. inliers_ratio=inliers_ratio,
  522. trail_points=trail_points,
  523. trail_thickness=opt.trail_thickness,
  524. status_text=status_text,
  525. )
  526. if show_fps:
  527. cv2.putText(ref_view, f"FPS: {fps_display:.1f}", (10, 90),
  528. cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
  529. if not opt.no_display:
  530. cv2.imshow(window_name_ref, ref_view)
  531. # Show raw incoming frame for debugging / monitoring.
  532. # (frame is already oriented according to --rotate/flip)
  533. cv2.imshow(window_name_cam, frame)
  534. key = cv2.waitKey(1) & 0xFF
  535. else:
  536. key = 0
  537. last_display_time = time.perf_counter()
  538. else:
  539. if opt.idle_sleep_ms > 0:
  540. time.sleep(opt.idle_sleep_ms / 1000.0)
  541. key = 0
  542. now = time.time()
  543. if opt.timer_print_interval <= 0 or (now - last_timer_print_time) >= opt.timer_print_interval:
  544. timer.print("RoMa-Async")
  545. last_timer_print_time = now
  546. if key == ord("q"):
  547. print("Exiting via keyboard (q)")
  548. break
  549. if key == ord("n"):
  550. ref_frame = frame.copy()
  551. last_good_H_slow = None
  552. H_ref2cur_fast = None
  553. last_camera_center_ref = None
  554. trail_points = []
  555. print("Updated reference frame")
  556. elif key == ord("f"):
  557. show_fps = not show_fps
  558. if opt.output_dir is not None and should_render:
  559. out_file = Path(opt.output_dir, f"camera_pos_{frame_id:06d}.png")
  560. cv2.imwrite(str(out_file), ref_view)
  561. frame_id += 1
  562. loop_elapsed = time.time() - loop_start_time
  563. if opt.max_fps > 0:
  564. target_dt = 1.0 / opt.max_fps
  565. if loop_elapsed < target_dt:
  566. time.sleep(target_dt - loop_elapsed)
  567. loop_elapsed = target_dt
  568. dt = max(loop_elapsed, 1e-6)
  569. fps_display = 0.9 * fps_display + 0.1 * (1.0 / dt)
  570. now = time.time()
  571. if now - last_fps_print_time >= fps_print_interval:
  572. print(f"[FPS] {fps_display:.1f} FPS")
  573. last_fps_print_time = now
  574. finally:
  575. async_streamer.stop()
  576. try:
  577. cv2.destroyAllWindows()
  578. except Exception:
  579. pass
  580. if __name__ == "__main__":
  581. main()