yichael
/
image-match


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
							"""
在屏幕截图上画模板区域：支持 LightGlue 风格字典、通用匹配结果字典，或 LoFTR 命名风格的匹配对象。

LightGlue 风格：有有效四角坐标则画透视四边形，否则用大图上匹配点包一个轴对齐框。
"""

from __future__ import annotations

import argparse
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import cv2
import numpy as np


@dataclass(frozen=True)
class LoFTRTemplateAgainstScreenshotMatch:
    """匹配结果对象（原 ``loftr_template_match`` 类型；绘制轮廓用）。"""

    screenshot_bgr_full_size: np.ndarray
    homography_template_inference_to_screenshot_inference: np.ndarray | None
    template_width_pixels_at_inference: int
    template_height_pixels_at_inference: int
    divisor_inference_screenshot_x_to_fullsize_x: float
    divisor_inference_screenshot_y_to_fullsize_y: float
    high_confidence_match_points_on_screenshot_inference: np.ndarray
    template_original_width_pixels: int
    template_original_height_pixels: int
    refined_template_bbox_xywh_full_size: tuple[float, float, float, float] | None
    ransac_inlier_points_screenshot_inference: np.ndarray


def _homography_quad_plausible_on_full_image(
    corners_full: np.ndarray,
    full_w: int,
    full_h: int,
    tw0: int,
    th0: int,
) -> bool:
    _ = tw0, th0
    c = np.asarray(corners_full, dtype=np.float64).reshape(4, 2)
    if c.shape != (4, 2) or not np.all(np.isfinite(c)):
        return False
    margin = 4.0
    if np.min(c[:, 0]) < -margin or np.min(c[:, 1]) < -margin:
        return False
    if np.max(c[:, 0]) > full_w + margin or np.max(c[:, 1]) > full_h + margin:
        return False
    xs, ys = c[:, 0], c[:, 1]
    area = 0.5 * abs(
        float(np.dot(xs, np.roll(ys, 1)) - np.dot(ys, np.roll(xs, 1)))
    )
    img_area = float(max(1, full_w * full_h))
    if area < max(16.0, 1e-4 * img_area):
        return False
    if area > img_area * 3.0:
        return False
    return True


def draw_and_save_screenshot_with_template_match_outline(
    match_result: LoFTRTemplateAgainstScreenshotMatch,
    output_image_path: Path,
    outline_color_bgr: tuple[int, int, int] = (0, 255, 0),
    outline_thickness_pixels: int = 6,
) -> None:
    """
    在完整分辨率截图上画出模板匹配区域并写入文件。
    """
    output_image_path = Path(output_image_path)
    output_image_path.parent.mkdir(parents=True, exist_ok=True)

    annotated_screenshot_bgr = match_result.screenshot_bgr_full_size.copy()
    template_w = match_result.template_width_pixels_at_inference
    template_h = match_result.template_height_pixels_at_inference
    div_x = match_result.divisor_inference_screenshot_x_to_fullsize_x
    div_y = match_result.divisor_inference_screenshot_y_to_fullsize_y
    full_h, full_w = annotated_screenshot_bgr.shape[:2]
    tw0 = int(match_result.template_original_width_pixels)
    th0 = int(match_result.template_original_height_pixels)

    H = match_result.homography_template_inference_to_screenshot_inference
    corners_full: np.ndarray | None = None
    if (
        H is not None
        and np.asarray(H).size == 9
        and np.all(np.isfinite(np.asarray(H, dtype=np.float64)))
        and int(template_w) > 0
        and int(template_h) > 0
    ):
        tw = int(template_w)
        th = int(template_h)
        tpl_c = np.array(
            [
                [0.0, 0.0],
                [float(tw - 1), 0.0],
                [float(tw - 1), float(th - 1)],
                [0.0, float(th - 1)],
            ],
            dtype=np.float32,
        ).reshape(1, 4, 2)
        ci = cv2.perspectiveTransform(tpl_c, np.asarray(H, dtype=np.float64))[0]
        corners_full = np.stack([ci[:, 0] / div_x, ci[:, 1] / div_y], axis=1)
        if not _homography_quad_plausible_on_full_image(
            corners_full, full_w, full_h, tw0, th0
        ):
            corners_full = None

    if corners_full is not None:
        pts = corners_full.astype(np.int32).reshape(-1, 1, 2)
        cv2.polylines(
            annotated_screenshot_bgr,
            [pts],
            isClosed=True,
            color=outline_color_bgr,
            thickness=int(outline_thickness_pixels),
            lineType=cv2.LINE_AA,
        )
    elif match_result.refined_template_bbox_xywh_full_size is not None:
        rx, ry, rw, rh = match_result.refined_template_bbox_xywh_full_size
        x0, y0 = int(round(rx)), int(round(ry))
        x1, y1 = int(round(rx + rw)), int(round(ry + rh))
        cv2.rectangle(
            annotated_screenshot_bgr,
            (x0, y0),
            (x1, y1),
            outline_color_bgr,
            int(outline_thickness_pixels),
            lineType=cv2.LINE_AA,
        )
    else:
        inf = np.asarray(
            match_result.ransac_inlier_points_screenshot_inference,
            dtype=np.float64,
        ).reshape(-1, 2)
        if inf.shape[0] < 3:
            inf = np.asarray(
                match_result.high_confidence_match_points_on_screenshot_inference,
                dtype=np.float64,
            ).reshape(-1, 2)
        xy = np.stack([inf[:, 0] / div_x, inf[:, 1] / div_y], axis=1)
        xmin = int(np.floor(np.min(xy[:, 0])))
        ymin = int(np.floor(np.min(xy[:, 1])))
        xmax = int(np.ceil(np.max(xy[:, 0])))
        ymax = int(np.ceil(np.max(xy[:, 1])))
        cv2.rectangle(
            annotated_screenshot_bgr,
            (xmin, ymin),
            (xmax, ymax),
            outline_color_bgr,
            int(outline_thickness_pixels),
            lineType=cv2.LINE_AA,
        )

    if not cv2.imwrite(str(output_image_path), annotated_screenshot_bgr):
        raise OSError(f"无法写入：{output_image_path}")


def draw_and_save_screenshot_with_match_dict_outline(
    match_dict: dict[str, Any],
    output_image_path: str | Path,
    outline_color_bgr: tuple[int, int, int] = (0, 255, 0),
    outline_thickness_pixels: int = 6,
) -> None:
    """
    根据匹配管线输出的字典（含 ``screenshot_image_path``、四角或关键点等）在截图上画框或四边形。
    """
    output_image_path = Path(output_image_path)
    output_image_path.parent.mkdir(parents=True, exist_ok=True)
    scr_path = Path(str(match_dict["screenshot_image_path"]))
    bgr = cv2.imread(str(scr_path), cv2.IMREAD_COLOR)
    if bgr is None:
        raise FileNotFoundError(f"无法读取截图：{scr_path}")
    full_h, full_w = bgr.shape[:2]

    corners = match_dict.get("template_corners_on_screenshot_xy")
    if corners is not None:
        c = np.asarray(corners, dtype=np.float64).reshape(4, 2)
        if c.shape == (4, 2) and np.all(np.isfinite(c)):
            pts = c.astype(np.int32).reshape(-1, 1, 2)
            cv2.polylines(
                bgr,
                [pts],
                isClosed=True,
                color=outline_color_bgr,
                thickness=int(outline_thickness_pixels),
                lineType=cv2.LINE_AA,
            )
            if not cv2.imwrite(str(output_image_path), bgr):
                raise OSError(f"无法写入：{output_image_path}")
            return

    rb = match_dict.get("refined_template_bbox_xywh_full_size")
    if rb is not None and len(rb) >= 4:
        rx, ry, rw, rh = float(rb[0]), float(rb[1]), float(rb[2]), float(rb[3])
        x0, y0 = int(round(rx)), int(round(ry))
        x1, y1 = int(round(rx + rw)), int(round(ry + rh))
        cv2.rectangle(
            bgr,
            (x0, y0),
            (x1, y1),
            outline_color_bgr,
            int(outline_thickness_pixels),
            lineType=cv2.LINE_AA,
        )
        if not cv2.imwrite(str(output_image_path), bgr):
            raise OSError(f"无法写入：{output_image_path}")
        return

    mk = match_dict.get("matched_keypoints_original_xy")
    if mk is None:
        raise ValueError("字典中缺少可用的 template_corners、bbox 或 matched_keypoints")
    xy = np.asarray(mk, dtype=np.float64).reshape(-1, 2)
    if xy.shape[0] < 1:
        raise ValueError("matched_keypoints_original_xy 为空")
    xmin = max(0, int(np.floor(np.min(xy[:, 0]))))
    ymin = max(0, int(np.floor(np.min(xy[:, 1]))))
    xmax = min(full_w - 1, int(np.ceil(np.max(xy[:, 0]))))
    ymax = min(full_h - 1, int(np.ceil(np.max(xy[:, 1]))))
    cv2.rectangle(
        bgr,
        (xmin, ymin),
        (xmax, ymax),
        outline_color_bgr,
        int(outline_thickness_pixels),
        lineType=cv2.LINE_AA,
    )
    if not cv2.imwrite(str(output_image_path), bgr):
        raise OSError(f"无法写入：{output_image_path}")


def main() -> None:
    parser = argparse.ArgumentParser(
        description="根据匹配 JSON 字典在截图上画模板区域（测试用）。"
    )
    parser.add_argument(
        "json_path",
        type=Path,
        nargs="?",
        help="含 screenshot_image_path、template_corners_on_screenshot_xy 等的 JSON",
    )
    parser.add_argument(
        "-o",
        "--out",
        type=Path,
        default=Path("output") / "match_outline.png",
        help="输出图片路径",
    )
    args = parser.parse_args()
    if args.json_path is None:
        parser.print_help()
        raise SystemExit(2)
    import json

    data = json.loads(Path(args.json_path).read_text(encoding="utf-8"))
    draw_and_save_screenshot_with_match_dict_outline(data, args.out)
    print(f"已写入：{args.out.resolve()}", flush=True)


if __name__ == "__main__":
    if sys.platform == "win32":
        for stream in (sys.stdout, sys.stderr):
            try:
                stream.reconfigure(encoding="utf-8")
            except Exception:
                pass
    main()