#!/usr/bin/env python # -*- coding: utf-8 -*- """ 模板匹配:在截图中查找模板图片的位置 用法1: python image-match.py [threshold] 用法2: python image-match.py --adb --device --screenshot --template [--threshold 0.8] [--method template|feature] 用法2 会在 Python 内执行 adb 截图,避免 Node 处理二进制数据导致的兼容性问题 --method feature: 特征点匹配(优先 LightGlue,失败则 ORB + 多尺度模板),不同分辨率可复用 --method template: 像素模板匹配(TM_CCOEFF_NORMED),仅适合同分辨率 输出: JSON 到 stdout """ import sys import os import json import subprocess try: import cv2 import numpy as np except ImportError as e: print(json.dumps({"success": False, "error": f"OpenCV 导入失败: {e}。请安装: pip install opencv-python numpy"})) sys.exit(1) try: from PIL import Image as PILImage HAS_PIL = True except ImportError: HAS_PIL = False # LightGlue:若已安装(python/LightGlue pip install -e .),则优先用于 feature 匹配 HAS_LIGHTGLUE = False try: _lg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'LightGlue')) if _lg_root not in sys.path: sys.path.insert(0, _lg_root) from lightglue import LightGlue, SuperPoint from lightglue.utils import match_pair import torch HAS_LIGHTGLUE = True except Exception: pass def run_adb_screencap(adb_path, device, output_path): """在 Python 内执行 adb 截图,直接处理二进制流""" # Windows 下子进程需要可执行路径,正斜杠也可用 args = [adb_path.replace('/', os.sep), '-s', device, 'exec-out', 'screencap', '-p'] try: result = subprocess.run(args, capture_output=True, timeout=15) if result.returncode != 0: return False, (result.stderr or result.stdout or b'').decode('utf-8', errors='replace') data = result.stdout if not data or len(data) < 100: return False, "截图数据为空" # 注意:不要对 PNG 数据做 \r\n 替换,会破坏 IDAT 压缩块导致无法解析 out_dir = os.path.dirname(output_path) if out_dir: os.makedirs(out_dir, exist_ok=True) with open(output_path, 'wb') as f: f.write(data) return True, output_path except subprocess.TimeoutExpired: return False, "截图超时" except Exception as e: return False, str(e) def load_image(path): """从文件路径加载图片,兼容 OpenCV 无法直接读取的 PNG(如部分 Android 截图)""" if not os.path.exists(path): return None with open(path, 'rb') as f: data = np.frombuffer(f.read(), dtype=np.uint8) img = cv2.imdecode(data, cv2.IMREAD_COLOR) if img is not None: return img img = cv2.imread(path) if img is not None: return img if HAS_PIL: try: pil_img = PILImage.open(path).convert('RGB') img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) return img except Exception: pass return None def _numpy_bgr_to_torch_rgb(img_bgr): """(H,W,3) BGR numpy uint8 -> (3,H,W) float [0,1] RGB for LightGlue""" rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) t = np.ascontiguousarray(rgb.transpose(2, 0, 1)) return torch.from_numpy(t).float().div(255.0) def match_by_lightglue(screenshot, template, min_matches=8, device='cpu'): """ 使用 LightGlue + SuperPoint 做特征匹配,在截图中找模板位置。 返回 (x, y, w, h, center_x, center_y) 或 None。 """ if not HAS_LIGHTGLUE: return None t_h, t_w = template.shape[:2] try: img0 = _numpy_bgr_to_torch_rgb(screenshot) img1 = _numpy_bgr_to_torch_rgb(template) extractor = SuperPoint(max_num_keypoints=2048).eval().to(device) matcher = LightGlue(features='superpoint').eval().to(device) feats0, feats1, matches01 = match_pair(extractor, matcher, img0, img1, device=device) matches = matches01.get('matches') if matches is None or matches.shape[0] < min_matches: return None kp0 = feats0['keypoints'] kp1 = feats1['keypoints'] idx0 = matches[:, 0] idx1 = matches[:, 1] pts_screen = kp0[idx0].cpu().numpy().astype(np.float32) pts_template = kp1[idx1].cpu().numpy().astype(np.float32) H, mask = cv2.findHomography(pts_template, pts_screen, cv2.RANSAC, 5.0) if H is None: return None corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2) corners_screen = cv2.perspectiveTransform(corners, H) x_coords = corners_screen[:, 0, 0] y_coords = corners_screen[:, 0, 1] x = int(round(np.min(x_coords))) y = int(round(np.min(y_coords))) w = int(round(np.max(x_coords) - np.min(x_coords))) h = int(round(np.max(y_coords) - np.min(y_coords))) center_x = int(round(np.mean(x_coords))) center_y = int(round(np.mean(y_coords))) return (x, y, w, h, center_x, center_y) except Exception: return None def match_by_features(screenshot, template, min_good_matches=8): """ 基于特征点(ORB)匹配作为回退:在截图中找模板位置,返回 (x, y, w, h, center_x, center_y) 或 None。 """ gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) t_h, t_w = template.shape[:2] orb = cv2.ORB_create(nfeatures=2000) kp1, desc1 = orb.detectAndCompute(gray_tpl, None) kp2, desc2 = orb.detectAndCompute(gray_screen, None) if desc1 is None or desc2 is None or len(kp1) < 4 or len(kp2) < 4: return None bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) matches = bf.knnMatch(desc1, desc2, k=2) good = [] for m_n in matches: if len(m_n) != 2: continue m, n = m_n if m.distance < 0.75 * n.distance: good.append(m) if len(good) < min_good_matches: return None src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) if H is None: return None # 模板四角在截图中的坐标,用质心作为中心点 corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2) corners_screen = cv2.perspectiveTransform(corners, H) x_coords = corners_screen[:, 0, 0] y_coords = corners_screen[:, 0, 1] x = int(round(np.min(x_coords))) y = int(round(np.min(y_coords))) w = int(round(np.max(x_coords) - np.min(x_coords))) h = int(round(np.max(y_coords) - np.min(y_coords))) center_x = int(round(np.mean(x_coords))) center_y = int(round(np.mean(y_coords))) return (x, y, w, h, center_x, center_y) def multi_scale_template_match(screenshot, template, threshold=0.65): """ 多尺度模板匹配:对模板做多种缩放后在截图中匹配,适配不同分辨率(如简单图标、轮廓)。 返回 (x, y, w, h, center_x, center_y) 或 None。 """ gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) sh, sw = screenshot.shape[:2] t_h, t_w = template.shape[:2] best = None best_val = threshold # 从 0.4 到 1.6 倍缩放,步长约 0.15,保证缩放后不超出截图 for scale in np.arange(0.4, 1.65, 0.12): w = max(8, int(round(t_w * scale))) h = max(8, int(round(t_h * scale))) if h > sh or w > sw: continue resized = cv2.resize(gray_tpl, (w, h), interpolation=cv2.INTER_AREA) result = cv2.matchTemplate(gray_screen, resized, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val > best_val: best_val = max_val x, y = int(max_loc[0]), int(max_loc[1]) center_x = x + w // 2 center_y = y + h // 2 best = (x, y, w, h, center_x, center_y) return best def main(): screenshot_path = None template_path = None threshold = 0.8 method = 'feature' # feature=特征点匹配(跨分辨率), template=像素模板匹配 adb_path = None device = None if len(sys.argv) >= 2 and sys.argv[1] == '--adb': # 用法2:--adb --device --screenshot --template i = 1 while i < len(sys.argv): if sys.argv[i] == '--adb' and i + 1 < len(sys.argv): adb_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--device' and i + 1 < len(sys.argv): device = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--screenshot' and i + 1 < len(sys.argv): screenshot_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--template' and i + 1 < len(sys.argv): template_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--threshold' and i + 1 < len(sys.argv): threshold = float(sys.argv[i + 1]) i += 2 elif sys.argv[i] == '--method' and i + 1 < len(sys.argv): method = (sys.argv[i + 1] or 'feature').strip().lower() if method not in ('template', 'feature'): method = 'feature' i += 2 else: i += 1 if adb_path and device and screenshot_path and template_path: ok, msg = run_adb_screencap(adb_path, device, screenshot_path) if not ok: print(json.dumps({"success": False, "error": f"截图失败: {msg}"})) sys.exit(1) else: print(json.dumps({"success": False, "error": "缺少 --adb/--device/--screenshot/--template 参数"})) sys.exit(1) else: # 用法1:位置参数 if len(sys.argv) < 3: print(json.dumps({"success": False, "error": "用法: image-match.py [threshold] [method=feature|template]"})) sys.exit(1) screenshot_path = sys.argv[1] template_path = sys.argv[2] threshold = float(sys.argv[3]) if len(sys.argv) > 3 else 0.8 if len(sys.argv) > 4 and sys.argv[4].lower() in ('template', 'feature'): method = sys.argv[4].lower() if not os.path.exists(screenshot_path): print(json.dumps({"success": False, "error": f"截图文件不存在: {screenshot_path}"})) sys.exit(1) if not os.path.exists(template_path): print(json.dumps({"success": False, "error": f"模板文件不存在: {template_path}"})) sys.exit(1) screenshot = load_image(screenshot_path) template = load_image(template_path) if screenshot is None: print(json.dumps({"success": False, "error": "无法读取截图(文件损坏或格式不支持)"})) sys.exit(1) if template is None: print(json.dumps({"success": False, "error": f"无法读取模板: {template_path}"})) sys.exit(1) t_h, t_w = template.shape[:2] if method == 'template' and (t_h > screenshot.shape[0] or t_w > screenshot.shape[1]): print(json.dumps({"success": False, "error": "模板尺寸大于截图"})) sys.exit(1) if method == 'feature': # 1) LightGlue + SuperPoint 特征匹配(若已安装) if HAS_LIGHTGLUE: lg_result = match_by_lightglue(screenshot, template, device='cpu') if lg_result is not None: x, y, w, h, center_x, center_y = lg_result output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) # 2) 回退:ORB 特征点匹配 feat_result = match_by_features(screenshot, template) if feat_result is not None: x, y, w, h, center_x, center_y = feat_result output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) # 3) 回退:多尺度模板匹配,适合简单图标/轮廓(如心形、纯色图标),跨分辨率 fallback_threshold = min(threshold, 0.65) scale_result = multi_scale_template_match(screenshot, template, threshold=fallback_threshold) if scale_result is not None: x, y, w, h, center_x, center_y = scale_result output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) print(json.dumps({"success": False, "error": "LightGlue/特征点与多尺度模板均未匹配(可检查模板是否在画面中或使用 --method template)"})) sys.exit(1) # 使用 TM_CCOEFF_NORMED 进行模板匹配(仅同分辨率推荐) result = cv2.matchTemplate(screenshot, template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val < threshold: print(json.dumps({"success": False, "error": f"未找到匹配 (相似度 {max_val:.3f} < {threshold})"})) sys.exit(1) x, y = int(max_loc[0]), int(max_loc[1]) center_x = x + t_w // 2 center_y = y + t_h // 2 output = { "success": True, "x": x, "y": y, "width": t_w, "height": t_h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) if __name__ == "__main__": main()