#!/usr/bin/env python # -*- coding: utf-8 -*- """ 模板匹配:在截图中查找模板图片的位置 用法1: python image-match.py [threshold] 用法2: python image-match.py --adb --device --screenshot --template [--threshold 0.8] [--method template|feature] 用法2 会在 Python 内执行 adb 截图,避免 Node 处理二进制数据导致的兼容性问题 --method feature: 特征点匹配(优先 RoMa,失败则 ORB + 多尺度模板),不同分辨率可复用 --method template: 像素模板匹配(TM_CCOEFF_NORMED),仅适合同分辨率 输出: JSON 到 stdout """ import sys import os import json import subprocess try: import cv2 import numpy as np except ImportError as e: print(json.dumps({"success": False, "error": f"OpenCV 导入失败: {e}。请安装: pip install opencv-python numpy"})) sys.exit(1) try: from PIL import Image as PILImage HAS_PIL = True except ImportError: HAS_PIL = False # RoMa:若已安装(python/RoMa,pip install -e .),则优先用于 feature 匹配 HAS_ROMA = False try: _roma_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'python', 'RoMa')) if os.path.isdir(_roma_root) and _roma_root not in sys.path: sys.path.insert(0, _roma_root) from romatch import roma_outdoor import torch as _torch_roma HAS_ROMA = True except Exception: pass def save_match_crop(screenshot, x, y, w, h, crop_output_path): """匹配成功后,从截图中裁出 (x,y,w,h) 区域保存到 crop_output_path,便于肉眼核对。""" if not crop_output_path or w <= 0 or h <= 0: return try: sh, sw = screenshot.shape[:2] x1 = max(0, min(x, sw - 1)) y1 = max(0, min(y, sh - 1)) x2 = max(x1 + 1, min(x + w, sw)) y2 = max(y1 + 1, min(y + h, sh)) crop = screenshot[y1:y2, x1:x2] if crop.size > 0: out_dir = os.path.dirname(crop_output_path) if out_dir: os.makedirs(out_dir, exist_ok=True) cv2.imwrite(crop_output_path, crop) except Exception: pass def run_adb_screencap(adb_path, device, output_path): """在 Python 内执行 adb 截图,直接处理二进制流""" # Windows 下子进程需要可执行路径,正斜杠也可用 args = [adb_path.replace('/', os.sep), '-s', device, 'exec-out', 'screencap', '-p'] try: result = subprocess.run(args, capture_output=True, timeout=15) if result.returncode != 0: return False, (result.stderr or result.stdout or b'').decode('utf-8', errors='replace') data = result.stdout if not data or len(data) < 100: return False, "截图数据为空" # 注意:不要对 PNG 数据做 \r\n 替换,会破坏 IDAT 压缩块导致无法解析 out_dir = os.path.dirname(output_path) if out_dir: os.makedirs(out_dir, exist_ok=True) with open(output_path, 'wb') as f: f.write(data) return True, output_path except subprocess.TimeoutExpired: return False, "截图超时" except Exception as e: return False, str(e) def load_image(path): """从文件路径加载图片,兼容 OpenCV 无法直接读取的 PNG(如部分 Android 截图)""" if not os.path.exists(path): return None with open(path, 'rb') as f: data = np.frombuffer(f.read(), dtype=np.uint8) img = cv2.imdecode(data, cv2.IMREAD_COLOR) if img is not None: return img img = cv2.imread(path) if img is not None: return img if HAS_PIL: try: pil_img = PILImage.open(path).convert('RGB') img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) return img except Exception: pass return None def _roma_params(): """从环境变量读取 RoMa 参数,便于反复测试调参。默认针对「模板为截图中缩略图」优化。""" import os as _os coarse = int(_os.environ.get("ROMA_COARSE_RES", "560")) upsample = int(_os.environ.get("ROMA_UPSAMPLE_RES", "1152")) min_m = int(_os.environ.get("ROMA_MIN_MATCHES", "3")) sample_num = int(_os.environ.get("ROMA_SAMPLE_NUM", "20000")) ransac = float(_os.environ.get("ROMA_RANSAC_THRESH", "14.0")) return coarse, upsample, min_m, sample_num, ransac def match_by_roma(screenshot, template, min_matches=6, device=None): """ 使用 RoMa 稠密特征匹配,在截图中找模板位置;精度高、跨分辨率。 返回 (x, y, w, h, center_x, center_y) 或 None。 可通过环境变量调参: ROMA_COARSE_RES, ROMA_UPSAMPLE_RES, ROMA_MIN_MATCHES, ROMA_SAMPLE_NUM, ROMA_RANSAC_THRESH """ if not HAS_ROMA: return None t_h, t_w = template.shape[:2] sh_h, sh_w = screenshot.shape[:2] coarse_res, upsample_res, env_min_matches, sample_num, ransac_thresh = _roma_params() min_matches = env_min_matches # 调参时用环境变量 ROMA_MIN_MATCHES import tempfile try: if _torch_roma.get_float32_matmul_precision() != "highest": _torch_roma.set_float32_matmul_precision("highest") except Exception: pass try: if device is None: device = _torch_roma.device("cuda" if _torch_roma.cuda.is_available() else "cpu") roma_model = roma_outdoor(device=device, coarse_res=coarse_res, upsample_res=upsample_res) with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fa: with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fb: path_a = fa.name path_b = fb.name try: if HAS_PIL: PILImage.fromarray(cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)).save(path_a) PILImage.fromarray(cv2.cvtColor(template, cv2.COLOR_BGR2RGB)).save(path_b) else: cv2.imwrite(path_a, cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)) cv2.imwrite(path_b, cv2.cvtColor(template, cv2.COLOR_BGR2RGB)) warp, certainty = roma_model.match(path_a, path_b, device=device) matches, certainty = roma_model.sample(warp, certainty, num=sample_num) H_out, W_out = roma_model.get_output_resolution() kptsA, kptsB = roma_model.to_pixel_coordinates(matches, H_out, W_out, H_out, W_out) kptsA = kptsA.cpu().numpy().astype(np.float32) kptsB = kptsB.cpu().numpy().astype(np.float32) if kptsA.shape[0] < min_matches: return None scale_ax = sh_w / float(W_out) scale_ay = sh_h / float(H_out) scale_bx = t_w / float(W_out) scale_by = t_h / float(H_out) kptsA_orig = kptsA * np.array([scale_ax, scale_ay]) kptsB_orig = kptsB * np.array([scale_bx, scale_by]) # RANSAC 距离阈值略放宽,适配缩放/透视变形(可由 ROMA_RANSAC_THRESH 调节) H, mask = cv2.findHomography(kptsB_orig, kptsA_orig, cv2.RANSAC, ransac_thresh) if H is None: return None corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2) corners_screen = cv2.perspectiveTransform(corners, H) x_coords = corners_screen[:, 0, 0] y_coords = corners_screen[:, 0, 1] x = int(round(np.min(x_coords))) y = int(round(np.min(y_coords))) w = int(round(np.max(x_coords) - np.min(x_coords))) h = int(round(np.max(y_coords) - np.min(y_coords))) center_x = int(round(np.mean(x_coords))) center_y = int(round(np.mean(y_coords))) return (x, y, w, h, center_x, center_y) finally: try: os.unlink(path_a) os.unlink(path_b) except Exception: pass except Exception: return None def match_by_features(screenshot, template, min_good_matches=6): """ 基于特征点(ORB)匹配作为回退:在截图中找模板位置,返回 (x, y, w, h, center_x, center_y) 或 None。 """ gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) t_h, t_w = template.shape[:2] orb = cv2.ORB_create(nfeatures=2000) kp1, desc1 = orb.detectAndCompute(gray_tpl, None) kp2, desc2 = orb.detectAndCompute(gray_screen, None) if desc1 is None or desc2 is None or len(kp1) < 4 or len(kp2) < 4: return None bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) matches = bf.knnMatch(desc1, desc2, k=2) good = [] for m_n in matches: if len(m_n) != 2: continue m, n = m_n if m.distance < 0.82 * n.distance: good.append(m) if len(good) < min_good_matches: return None src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) if H is None: return None # 模板四角在截图中的坐标,用质心作为中心点 corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2) corners_screen = cv2.perspectiveTransform(corners, H) x_coords = corners_screen[:, 0, 0] y_coords = corners_screen[:, 0, 1] x = int(round(np.min(x_coords))) y = int(round(np.min(y_coords))) w = int(round(np.max(x_coords) - np.min(x_coords))) h = int(round(np.max(y_coords) - np.min(y_coords))) center_x = int(round(np.mean(x_coords))) center_y = int(round(np.mean(y_coords))) return (x, y, w, h, center_x, center_y) def multi_scale_template_match(screenshot, template, threshold=0.50, scale_min=0.4, scale_max=1.65): """ 多尺度模板匹配:对模板做多种缩放后在截图中匹配,适配不同分辨率(如简单图标、轮廓)。 scale_min, scale_max: 缩放比范围,如 0.08~2.0 可匹配截图中小缩略图。 返回 (x, y, w, h, center_x, center_y) 或 None。 """ gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) sh, sw = screenshot.shape[:2] t_h, t_w = template.shape[:2] best = None best_val = threshold step = max(0.02, (scale_max - scale_min) / 60.0) for scale in np.arange(scale_min, scale_max + step * 0.5, step): w = max(8, int(round(t_w * scale))) h = max(8, int(round(t_h * scale))) if h > sh or w > sw: continue resized = cv2.resize(gray_tpl, (w, h), interpolation=cv2.INTER_AREA) result = cv2.matchTemplate(gray_screen, resized, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val > best_val: best_val = max_val x, y = int(max_loc[0]), int(max_loc[1]) center_x = x + w // 2 center_y = y + h // 2 best = (x, y, w, h, center_x, center_y) return best def main(): screenshot_path = None template_path = None threshold = 0.8 method = 'feature' # feature=特征点匹配(跨分辨率), template=像素模板匹配 adb_path = None device = None scale_min, scale_max = 0.4, 1.65 center_ratio = 1.0 # 仅用模板中心比例 0-1,1=100% 全图;裁剪模板边缘后匹配可提高精准度 crop_square_percent = None # 若设置,则用方形区域裁剪:[百分比, 以w或h为边长] 如 [1,w] [0.1,h] crop_square_base = None # 'w' 或 'h' template_output_path = None # 若有裁剪且指定,则把裁剪后的图写回该路径(覆盖原模板) crop_output_path = None # 匹配成功后,从截图中裁出匹配区域保存到该路径(与模板同级,便于肉眼核对) if len(sys.argv) >= 2 and sys.argv[1] == '--adb': # 用法2:--adb --device --screenshot --template [--scale-min 0.2] [--scale-max 1.6] i = 1 while i < len(sys.argv): if sys.argv[i] == '--adb' and i + 1 < len(sys.argv): adb_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--device' and i + 1 < len(sys.argv): device = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--screenshot' and i + 1 < len(sys.argv): screenshot_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--template' and i + 1 < len(sys.argv): template_path = sys.argv[i + 1] i += 2 elif sys.argv[i] == '--threshold' and i + 1 < len(sys.argv): threshold = float(sys.argv[i + 1]) i += 2 elif sys.argv[i] == '--method' and i + 1 < len(sys.argv): method = (sys.argv[i + 1] or 'feature').strip().lower() if method not in ('template', 'feature'): method = 'feature' i += 2 elif sys.argv[i] == '--scale-min' and i + 1 < len(sys.argv): scale_min = float(sys.argv[i + 1]) i += 2 elif sys.argv[i] == '--scale-max' and i + 1 < len(sys.argv): scale_max = float(sys.argv[i + 1]) i += 2 elif sys.argv[i] == '--center-ratio' and i + 1 < len(sys.argv): center_ratio = float(sys.argv[i + 1]) if center_ratio <= 0 or center_ratio > 1: center_ratio = 1.0 i += 2 elif sys.argv[i] == '--crop-square' and i + 2 < len(sys.argv): try: crop_square_percent = float(sys.argv[i + 1]) crop_square_base = (sys.argv[i + 2] or '').strip().lower() if crop_square_base not in ('w', 'h') or crop_square_percent <= 0: crop_square_percent = None crop_square_base = None except (ValueError, TypeError): crop_square_percent = None crop_square_base = None i += 3 elif sys.argv[i] == '--template-output' and i + 1 < len(sys.argv): template_output_path = (sys.argv[i + 1] or '').strip() if not template_output_path: template_output_path = None i += 2 elif sys.argv[i] == '--crop-output' and i + 1 < len(sys.argv): crop_output_path = (sys.argv[i + 1] or '').strip() if not crop_output_path: crop_output_path = None i += 2 else: i += 1 if adb_path and device and screenshot_path and template_path: ok, msg = run_adb_screencap(adb_path, device, screenshot_path) if not ok: print(json.dumps({"success": False, "error": f"截图失败: {msg}"})) sys.exit(1) else: print(json.dumps({"success": False, "error": "缺少 --adb/--device/--screenshot/--template 参数"})) sys.exit(1) else: # 用法1:位置参数 if len(sys.argv) < 3: print(json.dumps({"success": False, "error": "用法: image-match.py [threshold] [method=feature|template]"})) sys.exit(1) screenshot_path = sys.argv[1] template_path = sys.argv[2] threshold = float(sys.argv[3]) if len(sys.argv) > 3 else 0.8 if len(sys.argv) > 4 and sys.argv[4].lower() in ('template', 'feature'): method = sys.argv[4].lower() if not os.path.exists(screenshot_path): print(json.dumps({"success": False, "error": f"截图文件不存在: {screenshot_path}"})) sys.exit(1) if not os.path.exists(template_path): print(json.dumps({"success": False, "error": f"模板文件不存在: {template_path}"})) sys.exit(1) screenshot = load_image(screenshot_path) template = load_image(template_path) if screenshot is None: print(json.dumps({"success": False, "error": "无法读取截图(文件损坏或格式不支持)"})) sys.exit(1) if template is None: print(json.dumps({"success": False, "error": f"无法读取模板: {template_path}"})) sys.exit(1) t_h, t_w = template.shape[:2] did_crop = False # 方形区域裁剪:以 template 的宽或高的百分比作为正方形边长,取中心正方形再匹配 if crop_square_percent is not None and crop_square_base in ('w', 'h'): side_raw = (t_w if crop_square_base == 'w' else t_h) * crop_square_percent side = min(max(1, int(round(side_raw))), t_w, t_h) x0 = (t_w - side) // 2 y0 = (t_h - side) // 2 template = template[y0:y0 + side, x0:x0 + side].copy() did_crop = True # 兼容旧参数:只裁剪模板边缘(取中心比例) elif center_ratio < 1.0: nw = max(1, int(t_w * center_ratio)) nh = max(1, int(t_h * center_ratio)) x0 = (t_w - nw) // 2 y0 = (t_h - nh) // 2 template = template[y0:y0 + nh, x0:x0 + nw].copy() did_crop = True if did_crop: out_path = template_output_path if template_output_path else template_path try: cv2.imwrite(out_path, template) except Exception: pass t_h, t_w = template.shape[:2] if method == 'template' and (t_h > screenshot.shape[0] or t_w > screenshot.shape[1]): print(json.dumps({"success": False, "error": "模板尺寸大于截图"})) sys.exit(1) if method == 'feature': sh, sw = screenshot.shape[:2] # 仅对相册缩略图(路径含 pic):小模板时优先多尺度匹配;scale_min 不低于 0.18,避免极小尺度误匹配到右上角草稿箱等区域 GALLERY_SCALE_MIN = 0.18 is_gallery_thumb = template_path and 'pic' in os.path.basename(template_path) scale_min_use = max(scale_min, GALLERY_SCALE_MIN) if is_gallery_thumb else scale_min if is_gallery_thumb and t_w < sw * 0.5 and t_h < sh * 0.5: for th in (0.52, 0.48, 0.44, 0.40): scale_result = multi_scale_template_match(screenshot, template, threshold=th, scale_min=scale_min_use, scale_max=scale_max) if scale_result is not None: x, y, w, h, center_x, center_y = scale_result save_match_crop(screenshot, x, y, w, h, crop_output_path) output = {"success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y} print(json.dumps(output)) sys.exit(0) # 1) RoMa 稠密特征匹配(若已安装);失败时用备用参数再试一次 if HAS_ROMA: roma_result = match_by_roma(screenshot, template, min_matches=4) if roma_result is None: _save = (os.environ.get('ROMA_COARSE_RES'), os.environ.get('ROMA_UPSAMPLE_RES'), os.environ.get('ROMA_MIN_MATCHES')) for co, up, mn in [(672, 1120, 4), (448, 864, 2)]: try: os.environ['ROMA_COARSE_RES'] = str(co) os.environ['ROMA_UPSAMPLE_RES'] = str(up) os.environ['ROMA_MIN_MATCHES'] = str(mn) roma_result = match_by_roma(screenshot, template, min_matches=mn) if roma_result is not None: break finally: pass try: if _save[0] is None and 'ROMA_COARSE_RES' in os.environ: del os.environ['ROMA_COARSE_RES'] elif _save[0] is not None: os.environ['ROMA_COARSE_RES'] = _save[0] if _save[1] is None and 'ROMA_UPSAMPLE_RES' in os.environ: del os.environ['ROMA_UPSAMPLE_RES'] elif _save[1] is not None: os.environ['ROMA_UPSAMPLE_RES'] = _save[1] if _save[2] is None and 'ROMA_MIN_MATCHES' in os.environ: del os.environ['ROMA_MIN_MATCHES'] elif _save[2] is not None: os.environ['ROMA_MIN_MATCHES'] = _save[2] except Exception: pass if roma_result is not None: x, y, w, h, center_x, center_y = roma_result save_match_crop(screenshot, x, y, w, h, crop_output_path) output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) # 2) 回退:ORB 特征点匹配 feat_result = match_by_features(screenshot, template) if feat_result is not None: x, y, w, h, center_x, center_y = feat_result save_match_crop(screenshot, x, y, w, h, crop_output_path) output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) # 3) 回退:多尺度模板匹配,阈值逐级放宽至 0.40;相册缩略图 scale_min 不低于 GALLERY_SCALE_MIN,避免误匹配草稿箱 scale_min_use = max(scale_min, GALLERY_SCALE_MIN) if is_gallery_thumb else scale_min scale_result = None for fallback_threshold in (0.52, 0.48, 0.44, 0.40): scale_result = multi_scale_template_match(screenshot, template, threshold=min(threshold, fallback_threshold), scale_min=scale_min_use, scale_max=scale_max) if scale_result is not None: break if scale_result is None and (t_w > 1.3 * t_h or t_h > 1.3 * t_w): t_s = min(t_w, t_h) cx, cy = t_w // 2, t_h // 2 y0, y1 = max(0, cy - t_s // 2), min(t_h, cy + t_s // 2) x0, x1 = max(0, cx - t_s // 2), min(t_w, cx + t_s // 2) if y1 > y0 and x1 > x0: crop = template[y0:y1, x0:x1] for fallback_threshold in (0.52, 0.48, 0.44, 0.40): scale_result = multi_scale_template_match(screenshot, crop, threshold=min(threshold, fallback_threshold), scale_min=scale_min_use, scale_max=scale_max) if scale_result is not None: break if scale_result is not None: x, y, w, h, center_x, center_y = scale_result save_match_crop(screenshot, x, y, w, h, crop_output_path) output = { "success": True, "x": x, "y": y, "width": w, "height": h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) print(json.dumps({"success": False, "error": "RoMa/特征点与多尺度模板均未匹配(可检查模板是否在画面中或使用 --method template)"})) sys.exit(1) # 使用 TM_CCOEFF_NORMED 进行模板匹配(仅同分辨率推荐) result = cv2.matchTemplate(screenshot, template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val < threshold: print(json.dumps({"success": False, "error": f"未找到匹配 (相似度 {max_val:.3f} < {threshold})"})) sys.exit(1) x, y = int(max_loc[0]), int(max_loc[1]) center_x = x + t_w // 2 center_y = y + t_h // 2 save_match_crop(screenshot, x, y, t_w, t_h, crop_output_path) output = { "success": True, "x": x, "y": y, "width": t_w, "height": t_h, "center_x": center_x, "center_y": center_y } print(json.dumps(output)) sys.exit(0) if __name__ == "__main__": main()