yichael
/
xhs-note-crawling


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
							"""
使用 LoFTR 在整张屏幕截图中匹配小模板图，得到几何关系（单应矩阵或备用点集）。
小模板 + 大图时 LoFTR 外点会破坏单应；局部 NCC 若以错误簇为中心会框错（如把刷新钮当成搜索图标）。
因此在全分辨率上做「多尺度全局 NCC」，优先取全图最高分作为最终框；LoFTR 仍用于几何备份。
"""

from __future__ import annotations

import math
import ssl
import sys
import urllib.request
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path

import cv2
import numpy as np

# 与 Kornia LoFTR outdoor 相同来源；本地无权重文件时从此处下载
LOFTR_OUTDOOR_WEIGHT_DOWNLOAD_URL = (
    "http://cmp.felk.cvut.cz/~mishkdmy/models/loftr_outdoor.ckpt"
)

_REPOSITORY_ROOT_DIRECTORY = Path(__file__).resolve().parent.parent


@dataclass(frozen=True)
class TemplateMatchBoundingBoxAndCenterRoundedToIntegerScreenPixels:
    bounding_box_left_integer: int
    bounding_box_top_integer: int
    bounding_box_width_integer: int
    bounding_box_height_integer: int
    center_screen_x_integer: int
    center_screen_y_integer: int

    def bounding_box_left_top_width_height_xywh_as_tuple_of_four_integers(
        self,
    ) -> tuple[int, int, int, int]:
        return (
            self.bounding_box_left_integer,
            self.bounding_box_top_integer,
            self.bounding_box_width_integer,
            self.bounding_box_height_integer,
        )


@dataclass(frozen=True)
class TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
    bounding_box_left_top_width_height_xywh_float_tuple: tuple[float, float, float, float]
    center_screen_x_float: float
    center_screen_y_float: float

    def as_rounded_to_integer_screen_coordinates(
        self,
    ) -> TemplateMatchBoundingBoxAndCenterRoundedToIntegerScreenPixels:
        bounding_box_left_float, bounding_box_top_float, bounding_box_width_float, bounding_box_height_float = (
            self.bounding_box_left_top_width_height_xywh_float_tuple
        )
        return TemplateMatchBoundingBoxAndCenterRoundedToIntegerScreenPixels(
            bounding_box_left_integer=int(round(bounding_box_left_float)),
            bounding_box_top_integer=int(round(bounding_box_top_float)),
            bounding_box_width_integer=int(round(bounding_box_width_float)),
            bounding_box_height_integer=int(round(bounding_box_height_float)),
            center_screen_x_integer=int(round(self.center_screen_x_float)),
            center_screen_y_integer=int(round(self.center_screen_y_float)),
        )


@dataclass(frozen=True)
class LoFTRTemplateAgainstScreenshotMatch:
    """模板在截图上的 LoFTR 匹配中间量，用于解析全分辨率包围盒与中心点。"""

    screenshot_bgr_full_size: np.ndarray
    """原始分辨率屏幕截图，BGR。"""
    homography_template_inference_to_screenshot_inference: np.ndarray | None
    """3×3 单应：模板推理分辨率坐标 → 截图推理分辨率坐标；可能因不可靠而为 None。"""
    template_width_pixels_at_inference: int
    template_height_pixels_at_inference: int
    divisor_inference_screenshot_x_to_fullsize_x: float
    divisor_inference_screenshot_y_to_fullsize_y: float
    high_confidence_match_points_on_screenshot_inference: np.ndarray
    """高置信度匹配点在截图推理坐标下；作回退包围盒用。"""
    template_original_width_pixels: int
    template_original_height_pixels: int
    refined_template_bbox_xywh_full_size: tuple[float, float, float, float] | None
    """NCC 精修成功时为全图坐标系下的 (x, y, width, height)；否则为 None。"""
    ransac_inlier_points_screenshot_inference: np.ndarray
    """RANSAC 内点在截图推理分辨率下的坐标；用于回退矩形与精修搜索中心。"""


class TemplateAgainstScreenshotMatcher(ABC):
    @abstractmethod
    def match_template_center_in_screenshot(
        self,
        template_image_file_path: Path | str,
        screenshot_image_file_path: Path | str | None = None,
        *,
        screenshot_bgr_full_size_numpy: np.ndarray | None = None,
    ) -> TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
        ...


def _template_match_result_center_xy_in_screenshot_pixels(
    match: LoFTRTemplateAgainstScreenshotMatch,
) -> tuple[float, float]:
    refined_bbox_xywh = match.refined_template_bbox_xywh_full_size
    if refined_bbox_xywh is not None:
        bbox_x, bbox_y, bbox_width, bbox_height = refined_bbox_xywh
        return (
            bbox_x + bbox_width / 2.0,
            bbox_y + bbox_height / 2.0,
        )
    homography_matrix = match.homography_template_inference_to_screenshot_inference
    if homography_matrix is not None:
        template_width_inference = match.template_width_pixels_at_inference
        template_height_inference = match.template_height_pixels_at_inference
        corners_template_inference_xy = np.array(
            [
                [0, 0],
                [template_width_inference - 1, 0],
                [template_width_inference - 1, template_height_inference - 1],
                [0, template_height_inference - 1],
            ],
            dtype=np.float32,
        ).reshape(1, 4, 2)
        corners_screenshot_inference_xy = cv2.perspectiveTransform(
            corners_template_inference_xy, homography_matrix
        )[0]
        divisor_x = match.divisor_inference_screenshot_x_to_fullsize_x
        divisor_y = match.divisor_inference_screenshot_y_to_fullsize_y
        corners_screenshot_full_xy = corners_screenshot_inference_xy.copy()
        corners_screenshot_full_xy[:, 0] /= divisor_x
        corners_screenshot_full_xy[:, 1] /= divisor_y
        mean_corner_xy = np.mean(corners_screenshot_full_xy, axis=0)
        return (float(mean_corner_xy[0]), float(mean_corner_xy[1]))
    inlier_points_inference_xy = match.ransac_inlier_points_screenshot_inference
    median_inference_xy = np.median(inlier_points_inference_xy, axis=0)
    divisor_x = match.divisor_inference_screenshot_x_to_fullsize_x
    divisor_y = match.divisor_inference_screenshot_y_to_fullsize_y
    return (
        float(median_inference_xy[0] / divisor_x),
        float(median_inference_xy[1] / divisor_y),
    )


def _template_match_bounding_box_xywh_full_size_float_tuple_from_loftr_template_against_screenshot_match_object(
    match: LoFTRTemplateAgainstScreenshotMatch,
) -> tuple[float, float, float, float]:
    refined_bbox_xywh = match.refined_template_bbox_xywh_full_size
    if refined_bbox_xywh is not None:
        return refined_bbox_xywh
    homography_matrix = match.homography_template_inference_to_screenshot_inference
    if homography_matrix is not None:
        template_width_inference = match.template_width_pixels_at_inference
        template_height_inference = match.template_height_pixels_at_inference
        corners_template_inference_xy = np.array(
            [
                [0, 0],
                [template_width_inference - 1, 0],
                [
                    template_width_inference - 1,
                    template_height_inference - 1,
                ],
                [0, template_height_inference - 1],
            ],
            dtype=np.float32,
        ).reshape(1, 4, 2)
        corners_screenshot_inference_xy = cv2.perspectiveTransform(
            corners_template_inference_xy,
            homography_matrix,
        )[0]
        divisor_x = match.divisor_inference_screenshot_x_to_fullsize_x
        divisor_y = match.divisor_inference_screenshot_y_to_fullsize_y
        corners_screenshot_full_xy = corners_screenshot_inference_xy.copy()
        corners_screenshot_full_xy[:, 0] /= divisor_x
        corners_screenshot_full_xy[:, 1] /= divisor_y
        min_x = float(np.min(corners_screenshot_full_xy[:, 0]))
        min_y = float(np.min(corners_screenshot_full_xy[:, 1]))
        max_x = float(np.max(corners_screenshot_full_xy[:, 0]))
        max_y = float(np.max(corners_screenshot_full_xy[:, 1]))
        return (min_x, min_y, max_x - min_x, max_y - min_y)
    inlier_points_inference_xy = match.ransac_inlier_points_screenshot_inference
    divisor_x = match.divisor_inference_screenshot_x_to_fullsize_x
    divisor_y = match.divisor_inference_screenshot_y_to_fullsize_y
    inlier_full_xy = inlier_points_inference_xy.astype(np.float64).copy()
    inlier_full_xy[:, 0] /= divisor_x
    inlier_full_xy[:, 1] /= divisor_y
    min_x = float(np.min(inlier_full_xy[:, 0]))
    min_y = float(np.min(inlier_full_xy[:, 1]))
    max_x = float(np.max(inlier_full_xy[:, 0]))
    max_y = float(np.max(inlier_full_xy[:, 1]))
    return (min_x, min_y, max_x - min_x, max_y - min_y)


def _template_match_bounding_box_and_center_in_screenshot_pixels_from_loftr_template_against_screenshot_match_object(
    match: LoFTRTemplateAgainstScreenshotMatch,
) -> TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
    bounding_box_left_top_width_height_xywh_float_tuple = (
        _template_match_bounding_box_xywh_full_size_float_tuple_from_loftr_template_against_screenshot_match_object(
            match,
        )
    )
    center_screen_x_float, center_screen_y_float = (
        _template_match_result_center_xy_in_screenshot_pixels(match)
    )
    return TemplateMatchBoundingBoxAndCenterInScreenshotPixels(
        bounding_box_left_top_width_height_xywh_float_tuple=(
            bounding_box_left_top_width_height_xywh_float_tuple
        ),
        center_screen_x_float=center_screen_x_float,
        center_screen_y_float=center_screen_y_float,
    )


class LoFTRTemplateAgainstScreenshotMatcher(TemplateAgainstScreenshotMatcher):
    def __init__(
        self,
        loftr_repository_directory: Path,
        loftr_weight_checkpoint_file_path: Path,
        *,
        template_long_edge_max_pixels: int = 640,
        screenshot_long_edge_max_pixels: int = 1280,
        ransac_reprojection_threshold: float = 3.0,
        max_matches_for_homography: int = 800,
    ) -> None:
        self._loftr_repository_directory = loftr_repository_directory
        self._loftr_weight_checkpoint_file_path = loftr_weight_checkpoint_file_path
        self._template_long_edge_max_pixels = template_long_edge_max_pixels
        self._screenshot_long_edge_max_pixels = screenshot_long_edge_max_pixels
        self._ransac_reprojection_threshold = ransac_reprojection_threshold
        self._max_matches_for_homography = max_matches_for_homography

    def match_template_center_in_screenshot(
        self,
        template_image_file_path: Path | str,
        screenshot_image_file_path: Path | str | None = None,
        *,
        screenshot_bgr_full_size_numpy: np.ndarray | None = None,
    ) -> TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
        screenshot_path_for_loftr: Path | None
        if screenshot_image_file_path is not None:
            screenshot_path_for_loftr = Path(screenshot_image_file_path)
        else:
            screenshot_path_for_loftr = None
        match_result = run_loftr_template_match(
            loftr_repository_directory=self._loftr_repository_directory,
            loftr_weight_checkpoint_path=self._loftr_weight_checkpoint_file_path,
            screenshot_image_path=screenshot_path_for_loftr,
            screenshot_bgr_full_size_numpy=screenshot_bgr_full_size_numpy,
            template_image_path=Path(template_image_file_path),
            template_long_edge_max_pixels=self._template_long_edge_max_pixels,
            screenshot_long_edge_max_pixels=self._screenshot_long_edge_max_pixels,
            ransac_reprojection_threshold=self._ransac_reprojection_threshold,
            max_matches_for_homography=self._max_matches_for_homography,
        )
        return match_result


def match_template_center_xy_for_screenshot_file_and_template_file(
    source_screen_screenshot_image_file_path: Path | str,
    template_image_file_path: Path | str,
) -> TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
    """
    在 ``source_screen_screenshot_image_file_path`` 所指整图（如全屏 PNG）中用 LoFTR + NCC 匹配 ``template_image_file_path``，
    返回全分辨率下的包围盒 ``(x, y, width, height)`` 与模板中心 ``(x, y)``。LoFTR 目录与权重使用仓库默认路径。
    """
    loftr_repository_directory_path = _REPOSITORY_ROOT_DIRECTORY / "python" / "LoFTR"
    loftr_weight_checkpoint_file_path = (
        loftr_repository_directory_path / "weights" / "loftr_outdoor.ckpt"
    )
    loftr_template_against_screenshot_matcher = LoFTRTemplateAgainstScreenshotMatcher(
        loftr_repository_directory_path,
        loftr_weight_checkpoint_file_path,
    )
    return loftr_template_against_screenshot_matcher.match_template_center_in_screenshot(
        template_image_file_path,
        Path(source_screen_screenshot_image_file_path),
    )


def _download_file_with_ssl_fallbacks(download_url: str, destination_file_path: Path) -> None:
    destination_file_path.parent.mkdir(parents=True, exist_ok=True)
    ssl_context_candidates: list[ssl.SSLContext] = []
    try:
        import certifi

        ssl_context_candidates.append(
            ssl.create_default_context(cafile=certifi.where())
        )
    except Exception:
        pass
    ssl_context_candidates.append(ssl.create_default_context())
    ssl_context_candidates.append(ssl._create_unverified_context())

    last_error: BaseException | None = None
    for ssl_context in ssl_context_candidates:
        try:
            with urllib.request.urlopen(
                download_url, context=ssl_context, timeout=300
            ) as response:
                destination_file_path.write_bytes(response.read())
            return
        except Exception as exc:
            last_error = exc
    raise RuntimeError(
        f"无法下载 LoFTR 权重：{download_url}\n请手动保存到：{destination_file_path}"
    ) from last_error


def _resize_grayscale_divisible_by_eight(
    grayscale_image: np.ndarray,
    longest_edge_max_pixels: int,
) -> tuple[np.ndarray, float, float]:
    """返回 (缩放图, divisor_x, divisor_y)，全尺寸坐标 = 推理坐标 / divisor。"""
    original_height, original_width = grayscale_image.shape[:2]
    target_width, target_height = original_width, original_height
    if max(target_height, target_width) > longest_edge_max_pixels:
        shrink_scale = longest_edge_max_pixels / max(target_height, target_width)
        target_width = int(round(original_width * shrink_scale))
        target_height = int(round(original_height * shrink_scale))
    inference_width = max((target_width // 8) * 8, 8)
    inference_height = max((target_height // 8) * 8, 8)
    resized = cv2.resize(
        grayscale_image,
        (inference_width, inference_height),
        interpolation=cv2.INTER_AREA,
    )
    divisor_x = inference_width / original_width
    divisor_y = inference_height / original_height
    return resized, divisor_x, divisor_y


def _filter_matches_near_confident_median(
    template_points: np.ndarray,
    screenshot_points: np.ndarray,
    confidence: np.ndarray,
    template_width_inf: int,
    template_height_inf: int,
    min_points: int = 12,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """保留置信度高且在几何中位数附近成簇的匹配，抑制随机外点。"""
    order = np.argsort(-confidence)
    top_n = min(200, len(order))
    sp = screenshot_points[order[:top_n]]
    tp = template_points[order[:top_n]]
    cf = confidence[order[:top_n]]

    median_screen = np.median(sp[: min(40, len(sp))], axis=0)
    diag = float(np.hypot(template_width_inf, template_height_inf))
    radius = max(diag * 2.5, 48.0)

    for _ in range(8):
        dist = np.linalg.norm(sp - median_screen, axis=1)
        keep = dist < radius
        if int(np.sum(keep)) >= min_points:
            return tp[keep], sp[keep], cf[keep]
        radius *= 1.35
        median_screen = np.median(sp[keep] if np.any(keep) else sp, axis=0)

    return tp, sp, cf


def _homography_quad_plausible_on_full_image(
    corners_full_xy: np.ndarray,
    image_width: int,
    image_height: int,
    template_orig_w: int,
    template_orig_h: int,
) -> bool:
    """检查单应投影四边形是否落在图内且尺度/长宽比与模板大致一致。"""
    if corners_full_xy.shape != (4, 2) or not np.all(np.isfinite(corners_full_xy)):
        return False
    margin_x = 0.08 * image_width
    margin_y = 0.08 * image_height
    xs, ys = corners_full_xy[:, 0], corners_full_xy[:, 1]
    if xs.min() < -margin_x or xs.max() > image_width + margin_x:
        return False
    if ys.min() < -margin_y or ys.max() > image_height + margin_y:
        return False

    bbox_w = float(xs.max() - xs.min())
    bbox_h = float(ys.max() - ys.min())
    if bbox_w < 4 or bbox_h < 4:
        return False

    tpl_ar = template_orig_w / max(template_orig_h, 1)
    box_ar = bbox_w / max(bbox_h, 1e-6)
    ratio = box_ar / max(tpl_ar, 1e-6)
    if ratio > 3.5 or ratio < (1.0 / 3.5):
        return False

    area = float(cv2.contourArea(corners_full_xy.astype(np.float32)))
    expected = float(template_orig_w * template_orig_h)
    if area < 0.12 * expected or area > 30.0 * expected:
        return False
    return True


def _ncc_global_multiscale_best_match(
    screenshot_gray_full: np.ndarray,
    template_gray_full: np.ndarray,
    template_scale_factors: tuple[float, ...] = (
        0.88,
        0.92,
        0.96,
        1.0,
        1.04,
        1.08,
        1.12,
    ),
    minimum_acceptable_score: float = 0.34,
) -> tuple[tuple[float, float, float, float], float] | None:
    """
    在全图上对多种缩放的模板做 TM_CCOEFF_NORMED，取全局最大响应。
    适用于 UI 小图标与截图同尺度、需避免「只在 LoFTR 簇附近找」而找错位置的情况。
    返回 ((x, y, w, h), 最佳分数)；分数低于阈值则视为不可靠。
    """
    h_img, w_img = screenshot_gray_full.shape[:2]
    h_tpl0, w_tpl0 = template_gray_full.shape[:2]
    if h_tpl0 >= h_img or w_tpl0 >= w_img:
        return None

    best_score = -1.0
    best_bbox_xywh: tuple[float, float, float, float] | None = None

    for scale in template_scale_factors:
        tw = max(3, int(round(w_tpl0 * scale)))
        th = max(3, int(round(h_tpl0 * scale)))
        if tw >= w_img or th >= h_img:
            continue
        template_scaled = cv2.resize(
            template_gray_full, (tw, th), interpolation=cv2.INTER_AREA
        )
        response_map = cv2.matchTemplate(
            screenshot_gray_full, template_scaled, cv2.TM_CCOEFF_NORMED
        )
        _, max_val, _, max_loc = cv2.minMaxLoc(response_map)
        if max_val > best_score:
            best_score = float(max_val)
            best_bbox_xywh = (
                float(max_loc[0]),
                float(max_loc[1]),
                float(tw),
                float(th),
            )

    if best_bbox_xywh is None or best_score < minimum_acceptable_score:
        return None
    return best_bbox_xywh, best_score


def list_template_match_centers_ncc_multiscale(
    template_image_file_path: Path | str,
    *,
    screenshot_image_file_path: Path | str | None = None,
    screenshot_bgr_numpy: np.ndarray | None = None,
    template_scale_factors: tuple[float, ...] = (
        0.88,
        0.92,
        0.96,
        1.0,
        1.04,
        1.08,
        1.12,
    ),
    min_score: float = 0.28,
    max_peaks_per_scale: int = 8,
    dedupe_distance_pixels: float = 28.0,
) -> list[tuple[float, float]]:
    """
    全图多尺度 ``matchTemplate``，收集局部极大响应，NMS 后按分数从高到低去重，
    返回模板中心点列表（全分辨率坐标），供与 OCR 锚点选「最近」匹配用。
    """
    if screenshot_bgr_numpy is not None:
        screenshot_bgr = screenshot_bgr_numpy
    else:
        screenshot_bgr = cv2.imread(
            str(screenshot_image_file_path), cv2.IMREAD_COLOR
        )
    template_gray = cv2.imread(str(template_image_file_path), cv2.IMREAD_GRAYSCALE)
    if screenshot_bgr is None or template_gray is None:
        raise FileNotFoundError(
            f"无法读取截图或模板：screenshot_image_file_path={screenshot_image_file_path!s} template_image_file_path={template_image_file_path!s}"
        )
    screenshot_gray = cv2.cvtColor(screenshot_bgr, cv2.COLOR_BGR2GRAY)
    h_img, w_img = screenshot_gray.shape[:2]
    h_tpl0, w_tpl0 = template_gray.shape[:2]
    if h_tpl0 >= h_img or w_tpl0 >= w_img:
        return []

    raw: list[tuple[float, float, float]] = []
    for scale in template_scale_factors:
        tw = max(3, int(round(w_tpl0 * scale)))
        th = max(3, int(round(h_tpl0 * scale)))
        if tw >= w_img or th >= h_img:
            continue
        template_scaled = cv2.resize(
            template_gray, (tw, th), interpolation=cv2.INTER_AREA
        )
        response_map = cv2.matchTemplate(
            screenshot_gray, template_scaled, cv2.TM_CCOEFF_NORMED
        )
        nms_radius = int(max(4, max(tw, th) * 0.55))
        work = response_map.copy()
        for _ in range(max_peaks_per_scale):
            _, max_val, _, max_loc = cv2.minMaxLoc(work)
            if max_val < min_score:
                break
            mx, my = max_loc
            cx = float(mx) + float(tw) / 2.0
            cy = float(my) + float(th) / 2.0
            raw.append((cx, cy, float(max_val)))
            x0 = max(0, mx - nms_radius)
            y0 = max(0, my - nms_radius)
            x1 = min(work.shape[1], mx + nms_radius + 1)
            y1 = min(work.shape[0], my + nms_radius + 1)
            work[y0:y1, x0:x1] = -1.0

    raw.sort(key=lambda row: -row[2])
    merged: list[tuple[float, float]] = []
    for cx, cy, _sc in raw:
        if any(
            math.hypot(cx - ox, cy - oy) < dedupe_distance_pixels
            for ox, oy in merged
        ):
            continue
        merged.append((cx, cy))
    return merged


def _ncc_refine_template_bbox(
    screenshot_gray_full: np.ndarray,
    template_gray_full: np.ndarray,
    center_x_full: float,
    center_y_full: float,
    search_margin_full: float,
    min_ncc_score: float = 0.25,
) -> tuple[float, float, float, float] | None:
    """在以 LoFTR 粗中心为邻域内做归一化互相关，返回最佳 (x, y, w, h)。"""
    h_img, w_img = screenshot_gray_full.shape[:2]
    h_t, w_t = template_gray_full.shape[:2]
    if h_t >= h_img or w_t >= w_img:
        return None

    half = int(max(search_margin_full, max(w_t, h_t) * 2))
    x0 = int(np.clip(center_x_full - half, 0, w_img - 1))
    y0 = int(np.clip(center_y_full - half, 0, h_img - 1))
    x1 = int(np.clip(center_x_full + half, 0, w_img))
    y1 = int(np.clip(center_y_full + half, 0, h_img))
    if x1 - x0 <= w_t or y1 - y0 <= h_t:
        return None

    roi = screenshot_gray_full[y0:y1, x0:x1]
    result = cv2.matchTemplate(roi, template_gray_full, cv2.TM_CCOEFF_NORMED)
    _, max_val, _, max_loc = cv2.minMaxLoc(result)
    if max_val < min_ncc_score:
        return None

    x = float(x0 + max_loc[0])
    y = float(y0 + max_loc[1])
    return (x, y, float(w_t), float(h_t))


def run_loftr_template_match(
    *,
    loftr_repository_directory: Path,
    loftr_weight_checkpoint_path: Path,
    screenshot_image_path: Path | None = None,
    screenshot_bgr_full_size_numpy: np.ndarray | None = None,
    template_image_path: Path,
    template_long_edge_max_pixels: int = 640,
    screenshot_long_edge_max_pixels: int = 1280,
    ransac_reprojection_threshold: float = 3.0,
    max_matches_for_homography: int = 800,
) -> TemplateMatchBoundingBoxAndCenterInScreenshotPixels:
    """
    读取模板图与截图，在推理尺度上跑 LoFTR，过滤外点后估计单应，并用 NCC 在全分辨率上精修位置；
    返回全分辨率包围盒与中心点（不写磁盘）。
    """
    if not loftr_repository_directory.is_dir():
        raise FileNotFoundError(f"未找到 LoFTR 源码目录：{loftr_repository_directory}")

    repository_path_string = str(loftr_repository_directory)
    if repository_path_string not in sys.path:
        sys.path.insert(0, repository_path_string)

    import torch  # noqa: E402

    from src.loftr import LoFTR, default_cfg  # noqa: E402

    if not loftr_weight_checkpoint_path.is_file():
        _download_file_with_ssl_fallbacks(
            LOFTR_OUTDOOR_WEIGHT_DOWNLOAD_URL, loftr_weight_checkpoint_path
        )

    if screenshot_bgr_full_size_numpy is not None:
        screenshot_bgr_full_size = screenshot_bgr_full_size_numpy
    else:
        screenshot_bgr_full_size = cv2.imread(
            str(screenshot_image_path), cv2.IMREAD_COLOR
        )
    template_grayscale = cv2.imread(
        str(template_image_path), cv2.IMREAD_GRAYSCALE
    )
    if screenshot_bgr_full_size is None or template_grayscale is None:
        raise FileNotFoundError(
            f"无法读取图片：screenshot_image_path={screenshot_image_path!s} template_image_path={template_image_path!s}"
        )

    template_orig_h, template_orig_w = template_grayscale.shape[:2]
    screenshot_grayscale = cv2.cvtColor(
        screenshot_bgr_full_size, cv2.COLOR_BGR2GRAY
    )
    full_h, full_w = screenshot_grayscale.shape[:2]

    template_at_inference, _, _ = _resize_grayscale_divisible_by_eight(
        template_grayscale, template_long_edge_max_pixels
    )
    screenshot_at_inference, divisor_screen_x, divisor_screen_y = (
        _resize_grayscale_divisible_by_eight(
            screenshot_grayscale, screenshot_long_edge_max_pixels
        )
    )

    template_height_at_inference, template_width_at_inference = (
        template_at_inference.shape[:2]
    )

    compute_device = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu"
    )
    template_batch = (
        torch.from_numpy(template_at_inference).float()[None, None].to(compute_device)
        / 255.0
    )
    screenshot_batch = (
        torch.from_numpy(screenshot_at_inference).float()[None, None].to(compute_device)
        / 255.0
    )

    matcher_network = LoFTR(config=default_cfg)
    checkpoint = torch.load(
        str(loftr_weight_checkpoint_path), map_location=compute_device
    )
    matcher_network.load_state_dict(checkpoint["state_dict"])
    matcher_network = matcher_network.eval().to(compute_device)
    torch.set_grad_enabled(False)

    forward_batch = {"image0": template_batch, "image1": screenshot_batch}
    matcher_network(forward_batch)

    template_match_points = forward_batch["mkpts0_f"].detach().cpu().numpy()
    screenshot_match_points = forward_batch["mkpts1_f"].detach().cpu().numpy()
    match_confidence = forward_batch["mconf"].detach().cpu().numpy()

    trusted_homography: np.ndarray | None
    screenshot_points_for_ransac: np.ndarray
    inlier_screen: np.ndarray

    if len(template_match_points) < 4:
        inference_screenshot_height, inference_screenshot_width = (
            screenshot_at_inference.shape[:2]
        )
        inlier_screen = np.array(
            [
                [
                    float(inference_screenshot_width) * 0.5,
                    float(inference_screenshot_height) * 0.5,
                ]
            ],
            dtype=np.float32,
        )
        screenshot_points_for_ransac = inlier_screen.copy()
        trusted_homography = None
    else:
        tp_filt, sp_filt, cf_filt = _filter_matches_near_confident_median(
            template_match_points,
            screenshot_match_points,
            match_confidence,
            template_width_at_inference,
            template_height_at_inference,
        )

        confidence_sorted_indices = np.argsort(-cf_filt)[
            : min(max_matches_for_homography, len(cf_filt))
        ]
        template_points_for_ransac = tp_filt[confidence_sorted_indices].astype(
            np.float32
        )
        screenshot_points_for_ransac = sp_filt[confidence_sorted_indices].astype(
            np.float32
        )

        homography_matrix, homography_mask = cv2.findHomography(
            template_points_for_ransac,
            screenshot_points_for_ransac,
            cv2.RANSAC,
            ransac_reprojection_threshold,
            maxIters=5000,
            confidence=0.995,
        )

        if homography_mask is not None:
            inlier_flat = homography_mask.ravel().astype(bool)
            inlier_screen = screenshot_points_for_ransac[inlier_flat]
        else:
            inlier_screen = screenshot_points_for_ransac

        if len(inlier_screen) < 4:
            inlier_screen = sp_filt

        trusted_homography = homography_matrix
        if homography_matrix is not None:
            corners_tpl = np.array(
                [
                    [0, 0],
                    [template_width_at_inference - 1, 0],
                    [
                        template_width_at_inference - 1,
                        template_height_at_inference - 1,
                    ],
                    [0, template_height_at_inference - 1],
                ],
                dtype=np.float32,
            ).reshape(1, 4, 2)
            corners_inf = cv2.perspectiveTransform(corners_tpl, homography_matrix)[0]
            corners_full = corners_inf.copy()
            corners_full[:, 0] /= divisor_screen_x
            corners_full[:, 1] /= divisor_screen_y
            if not _homography_quad_plausible_on_full_image(
                corners_full, full_w, full_h, template_orig_w, template_orig_h
            ):
                trusted_homography = None

    center_inf = np.median(inlier_screen, axis=0)
    center_full_x = float(center_inf[0] / divisor_screen_x)
    center_full_y = float(center_inf[1] / divisor_screen_y)
    search_margin = float(
        max(
            template_orig_w,
            template_orig_h,
            template_width_at_inference / divisor_screen_x,
        )
        * 2.5
    )

    global_ncc = _ncc_global_multiscale_best_match(
        screenshot_grayscale, template_grayscale
    )
    local_ncc = _ncc_refine_template_bbox(
        screenshot_grayscale,
        template_grayscale,
        center_full_x,
        center_full_y,
        search_margin,
    )

    refined_bbox: tuple[float, float, float, float] | None
    if global_ncc is not None:
        refined_bbox = global_ncc[0]
    else:
        refined_bbox = local_ncc

    if refined_bbox is None:
        relaxed_global_ncc = _ncc_global_multiscale_best_match(
            screenshot_grayscale,
            template_grayscale,
            minimum_acceptable_score=0.18,
        )
        if relaxed_global_ncc is not None:
            refined_bbox = relaxed_global_ncc[0]

    template_against_screenshot_match = LoFTRTemplateAgainstScreenshotMatch(
        screenshot_bgr_full_size=screenshot_bgr_full_size,
        homography_template_inference_to_screenshot_inference=trusted_homography,
        template_width_pixels_at_inference=template_width_at_inference,
        template_height_pixels_at_inference=template_height_at_inference,
        divisor_inference_screenshot_x_to_fullsize_x=divisor_screen_x,
        divisor_inference_screenshot_y_to_fullsize_y=divisor_screen_y,
        high_confidence_match_points_on_screenshot_inference=screenshot_points_for_ransac,
        template_original_width_pixels=template_orig_w,
        template_original_height_pixels=template_orig_h,
        refined_template_bbox_xywh_full_size=refined_bbox,
        ransac_inlier_points_screenshot_inference=inlier_screen.astype(np.float32),
    )
    return _template_match_bounding_box_and_center_in_screenshot_pixels_from_loftr_template_against_screenshot_match_object(
        template_against_screenshot_match,
    )