yichael
/
AndroidRemoteController


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
							#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
模板匹配：在截图中查找模板图片的位置
用法1: python image-match.py <screenshot_path> <template_path> [threshold]
用法2: python image-match.py --adb <adb_path> --device <device_id> --screenshot <out_path> --template <template_path> [--threshold 0.8] [--method template|feature]
      用法2 会在 Python 内执行 adb 截图，避免 Node 处理二进制数据导致的兼容性问题
  --method feature: 特征点匹配（优先 RoMa，失败则 ORB + 多尺度模板），不同分辨率可复用
  --method template: 像素模板匹配（TM_CCOEFF_NORMED），仅适合同分辨率
输出: JSON 到 stdout
"""

import sys
import os
import json
import subprocess

try:
    import cv2
    import numpy as np
except ImportError as e:
    print(json.dumps({"success": False, "error": f"OpenCV 导入失败: {e}。请安装: pip install opencv-python numpy"}))
    sys.exit(1)

try:
    from PIL import Image as PILImage
    HAS_PIL = True
except ImportError:
    HAS_PIL = False

# RoMa：若已安装（python/RoMa，pip install -e .），则优先用于 feature 匹配
HAS_ROMA = False
try:
    _roma_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'python', 'RoMa'))
    if os.path.isdir(_roma_root) and _roma_root not in sys.path:
        sys.path.insert(0, _roma_root)
    from romatch import roma_outdoor
    import torch as _torch_roma
    HAS_ROMA = True
except Exception:
    pass

def run_adb_screencap(adb_path, device, output_path):
    """在 Python 内执行 adb 截图，直接处理二进制流"""
    # Windows 下子进程需要可执行路径，正斜杠也可用
    args = [adb_path.replace('/', os.sep), '-s', device, 'exec-out', 'screencap', '-p']
    try:
        result = subprocess.run(args, capture_output=True, timeout=15)
        if result.returncode != 0:
            return False, (result.stderr or result.stdout or b'').decode('utf-8', errors='replace')
        data = result.stdout
        if not data or len(data) < 100:
            return False, "截图数据为空"
        # 注意：不要对 PNG 数据做 \r\n 替换，会破坏 IDAT 压缩块导致无法解析
        out_dir = os.path.dirname(output_path)
        if out_dir:
            os.makedirs(out_dir, exist_ok=True)
        with open(output_path, 'wb') as f:
            f.write(data)
        return True, output_path
    except subprocess.TimeoutExpired:
        return False, "截图超时"
    except Exception as e:
        return False, str(e)

def load_image(path):
    """从文件路径加载图片，兼容 OpenCV 无法直接读取的 PNG（如部分 Android 截图）"""
    if not os.path.exists(path):
        return None
    with open(path, 'rb') as f:
        data = np.frombuffer(f.read(), dtype=np.uint8)
    img = cv2.imdecode(data, cv2.IMREAD_COLOR)
    if img is not None:
        return img
    img = cv2.imread(path)
    if img is not None:
        return img
    if HAS_PIL:
        try:
            pil_img = PILImage.open(path).convert('RGB')
            img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
            return img
        except Exception:
            pass
    return None


def _roma_params():
    """从环境变量读取 RoMa 参数，便于反复测试调参。默认针对「模板为截图中缩略图」优化。"""
    import os as _os
    coarse = int(_os.environ.get("ROMA_COARSE_RES", "560"))
    upsample = int(_os.environ.get("ROMA_UPSAMPLE_RES", "1152"))
    min_m = int(_os.environ.get("ROMA_MIN_MATCHES", "3"))
    sample_num = int(_os.environ.get("ROMA_SAMPLE_NUM", "20000"))
    ransac = float(_os.environ.get("ROMA_RANSAC_THRESH", "14.0"))
    return coarse, upsample, min_m, sample_num, ransac


def match_by_roma(screenshot, template, min_matches=6, device=None):
    """
    使用 RoMa 稠密特征匹配，在截图中找模板位置；精度高、跨分辨率。
    返回 (x, y, w, h, center_x, center_y) 或 None。
    可通过环境变量调参: ROMA_COARSE_RES, ROMA_UPSAMPLE_RES, ROMA_MIN_MATCHES, ROMA_SAMPLE_NUM, ROMA_RANSAC_THRESH
    """
    if not HAS_ROMA:
        return None
    t_h, t_w = template.shape[:2]
    sh_h, sh_w = screenshot.shape[:2]
    coarse_res, upsample_res, env_min_matches, sample_num, ransac_thresh = _roma_params()
    min_matches = env_min_matches  # 调参时用环境变量 ROMA_MIN_MATCHES
    import tempfile
    try:
        if _torch_roma.get_float32_matmul_precision() != "highest":
            _torch_roma.set_float32_matmul_precision("highest")
    except Exception:
        pass
    try:
        if device is None:
            device = _torch_roma.device("cuda" if _torch_roma.cuda.is_available() else "cpu")
        roma_model = roma_outdoor(device=device, coarse_res=coarse_res, upsample_res=upsample_res)
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fa:
            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fb:
                path_a = fa.name
                path_b = fb.name
        try:
            if HAS_PIL:
                PILImage.fromarray(cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)).save(path_a)
                PILImage.fromarray(cv2.cvtColor(template, cv2.COLOR_BGR2RGB)).save(path_b)
            else:
                cv2.imwrite(path_a, cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB))
                cv2.imwrite(path_b, cv2.cvtColor(template, cv2.COLOR_BGR2RGB))
            warp, certainty = roma_model.match(path_a, path_b, device=device)
            matches, certainty = roma_model.sample(warp, certainty, num=sample_num)
            H_out, W_out = roma_model.get_output_resolution()
            kptsA, kptsB = roma_model.to_pixel_coordinates(matches, H_out, W_out, H_out, W_out)
            kptsA = kptsA.cpu().numpy().astype(np.float32)
            kptsB = kptsB.cpu().numpy().astype(np.float32)
            if kptsA.shape[0] < min_matches:
                return None
            scale_ax = sh_w / float(W_out)
            scale_ay = sh_h / float(H_out)
            scale_bx = t_w / float(W_out)
            scale_by = t_h / float(H_out)
            kptsA_orig = kptsA * np.array([scale_ax, scale_ay])
            kptsB_orig = kptsB * np.array([scale_bx, scale_by])
            # RANSAC 距离阈值略放宽，适配缩放/透视变形（可由 ROMA_RANSAC_THRESH 调节）
            H, mask = cv2.findHomography(kptsB_orig, kptsA_orig, cv2.RANSAC, ransac_thresh)
            if H is None:
                return None
            corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2)
            corners_screen = cv2.perspectiveTransform(corners, H)
            x_coords = corners_screen[:, 0, 0]
            y_coords = corners_screen[:, 0, 1]
            x = int(round(np.min(x_coords)))
            y = int(round(np.min(y_coords)))
            w = int(round(np.max(x_coords) - np.min(x_coords)))
            h = int(round(np.max(y_coords) - np.min(y_coords)))
            center_x = int(round(np.mean(x_coords)))
            center_y = int(round(np.mean(y_coords)))
            return (x, y, w, h, center_x, center_y)
        finally:
            try:
                os.unlink(path_a)
                os.unlink(path_b)
            except Exception:
                pass
    except Exception:
        return None


def match_by_features(screenshot, template, min_good_matches=6):
    """
    基于特征点（ORB）匹配作为回退：在截图中找模板位置，返回 (x, y, w, h, center_x, center_y) 或 None。
    """
    gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
    gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    t_h, t_w = template.shape[:2]

    orb = cv2.ORB_create(nfeatures=2000)
    kp1, desc1 = orb.detectAndCompute(gray_tpl, None)
    kp2, desc2 = orb.detectAndCompute(gray_screen, None)
    if desc1 is None or desc2 is None or len(kp1) < 4 or len(kp2) < 4:
        return None

    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
    matches = bf.knnMatch(desc1, desc2, k=2)
    good = []
    for m_n in matches:
        if len(m_n) != 2:
            continue
        m, n = m_n
        if m.distance < 0.82 * n.distance:
            good.append(m)
    if len(good) < min_good_matches:
        return None

    src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    if H is None:
        return None

    # 模板四角在截图中的坐标，用质心作为中心点
    corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2)
    corners_screen = cv2.perspectiveTransform(corners, H)
    x_coords = corners_screen[:, 0, 0]
    y_coords = corners_screen[:, 0, 1]
    x = int(round(np.min(x_coords)))
    y = int(round(np.min(y_coords)))
    w = int(round(np.max(x_coords) - np.min(x_coords)))
    h = int(round(np.max(y_coords) - np.min(y_coords)))
    center_x = int(round(np.mean(x_coords)))
    center_y = int(round(np.mean(y_coords)))
    return (x, y, w, h, center_x, center_y)


def multi_scale_template_match(screenshot, template, threshold=0.50, scale_min=0.4, scale_max=1.65):
    """
    多尺度模板匹配：对模板做多种缩放后在截图中匹配，适配不同分辨率（如简单图标、轮廓）。
    scale_min, scale_max: 缩放比范围，如 0.08～2.0 可匹配截图中小缩略图。
    返回 (x, y, w, h, center_x, center_y) 或 None。
    """
    gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
    gray_tpl = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    sh, sw = screenshot.shape[:2]
    t_h, t_w = template.shape[:2]
    best = None
    best_val = threshold
    step = max(0.03, (scale_max - scale_min) / 38.0)
    for scale in np.arange(scale_min, scale_max + step * 0.5, step):
        w = max(8, int(round(t_w * scale)))
        h = max(8, int(round(t_h * scale)))
        if h > sh or w > sw:
            continue
        resized = cv2.resize(gray_tpl, (w, h), interpolation=cv2.INTER_AREA)
        result = cv2.matchTemplate(gray_screen, resized, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        if max_val > best_val:
            best_val = max_val
            x, y = int(max_loc[0]), int(max_loc[1])
            center_x = x + w // 2
            center_y = y + h // 2
            best = (x, y, w, h, center_x, center_y)
    return best


def main():
    screenshot_path = None
    template_path = None
    threshold = 0.8
    method = 'feature'  # feature=特征点匹配(跨分辨率), template=像素模板匹配
    adb_path = None
    device = None
    scale_min, scale_max = 0.4, 1.65

    if len(sys.argv) >= 2 and sys.argv[1] == '--adb':
        # 用法2：--adb --device --screenshot --template [--scale-min 0.2] [--scale-max 1.6]
        i = 1
        while i < len(sys.argv):
            if sys.argv[i] == '--adb' and i + 1 < len(sys.argv):
                adb_path = sys.argv[i + 1]
                i += 2
            elif sys.argv[i] == '--device' and i + 1 < len(sys.argv):
                device = sys.argv[i + 1]
                i += 2
            elif sys.argv[i] == '--screenshot' and i + 1 < len(sys.argv):
                screenshot_path = sys.argv[i + 1]
                i += 2
            elif sys.argv[i] == '--template' and i + 1 < len(sys.argv):
                template_path = sys.argv[i + 1]
                i += 2
            elif sys.argv[i] == '--threshold' and i + 1 < len(sys.argv):
                threshold = float(sys.argv[i + 1])
                i += 2
            elif sys.argv[i] == '--method' and i + 1 < len(sys.argv):
                method = (sys.argv[i + 1] or 'feature').strip().lower()
                if method not in ('template', 'feature'):
                    method = 'feature'
                i += 2
            elif sys.argv[i] == '--scale-min' and i + 1 < len(sys.argv):
                scale_min = float(sys.argv[i + 1])
                i += 2
            elif sys.argv[i] == '--scale-max' and i + 1 < len(sys.argv):
                scale_max = float(sys.argv[i + 1])
                i += 2
            else:
                i += 1
        if adb_path and device and screenshot_path and template_path:
            ok, msg = run_adb_screencap(adb_path, device, screenshot_path)
            if not ok:
                print(json.dumps({"success": False, "error": f"截图失败: {msg}"}))
                sys.exit(1)
        else:
            print(json.dumps({"success": False, "error": "缺少 --adb/--device/--screenshot/--template 参数"}))
            sys.exit(1)
    else:
        # 用法1：位置参数
        if len(sys.argv) < 3:
            print(json.dumps({"success": False, "error": "用法: image-match.py <screenshot_path> <template_path> [threshold] [method=feature|template]"}))
            sys.exit(1)
        screenshot_path = sys.argv[1]
        template_path = sys.argv[2]
        threshold = float(sys.argv[3]) if len(sys.argv) > 3 else 0.8
        if len(sys.argv) > 4 and sys.argv[4].lower() in ('template', 'feature'):
            method = sys.argv[4].lower()

    if not os.path.exists(screenshot_path):
        print(json.dumps({"success": False, "error": f"截图文件不存在: {screenshot_path}"}))
        sys.exit(1)

    if not os.path.exists(template_path):
        print(json.dumps({"success": False, "error": f"模板文件不存在: {template_path}"}))
        sys.exit(1)

    screenshot = load_image(screenshot_path)
    template = load_image(template_path)

    if screenshot is None:
        print(json.dumps({"success": False, "error": "无法读取截图（文件损坏或格式不支持）"}))
        sys.exit(1)

    if template is None:
        print(json.dumps({"success": False, "error": f"无法读取模板: {template_path}"}))
        sys.exit(1)

    t_h, t_w = template.shape[:2]
    if method == 'template' and (t_h > screenshot.shape[0] or t_w > screenshot.shape[1]):
        print(json.dumps({"success": False, "error": "模板尺寸大于截图"}))
        sys.exit(1)

    if method == 'feature':
        # 1) RoMa 稠密特征匹配（若已安装）；失败时用备用参数再试一次
        if HAS_ROMA:
            roma_result = match_by_roma(screenshot, template, min_matches=4)
            if roma_result is None:
                _save = (os.environ.get('ROMA_COARSE_RES'), os.environ.get('ROMA_UPSAMPLE_RES'), os.environ.get('ROMA_MIN_MATCHES'))
                for co, up, mn in [(672, 1120, 4), (448, 864, 2)]:
                    try:
                        os.environ['ROMA_COARSE_RES'] = str(co)
                        os.environ['ROMA_UPSAMPLE_RES'] = str(up)
                        os.environ['ROMA_MIN_MATCHES'] = str(mn)
                        roma_result = match_by_roma(screenshot, template, min_matches=mn)
                        if roma_result is not None:
                            break
                    finally:
                        pass
                try:
                    if _save[0] is None and 'ROMA_COARSE_RES' in os.environ:
                        del os.environ['ROMA_COARSE_RES']
                    elif _save[0] is not None:
                        os.environ['ROMA_COARSE_RES'] = _save[0]
                    if _save[1] is None and 'ROMA_UPSAMPLE_RES' in os.environ:
                        del os.environ['ROMA_UPSAMPLE_RES']
                    elif _save[1] is not None:
                        os.environ['ROMA_UPSAMPLE_RES'] = _save[1]
                    if _save[2] is None and 'ROMA_MIN_MATCHES' in os.environ:
                        del os.environ['ROMA_MIN_MATCHES']
                    elif _save[2] is not None:
                        os.environ['ROMA_MIN_MATCHES'] = _save[2]
                except Exception:
                    pass
            if roma_result is not None:
                x, y, w, h, center_x, center_y = roma_result
                output = {
                    "success": True,
                    "x": x,
                    "y": y,
                    "width": w,
                    "height": h,
                    "center_x": center_x,
                    "center_y": center_y
                }
                print(json.dumps(output))
                sys.exit(0)
        # 2) 回退：ORB 特征点匹配
        feat_result = match_by_features(screenshot, template)
        if feat_result is not None:
            x, y, w, h, center_x, center_y = feat_result
            output = {
                "success": True,
                "x": x,
                "y": y,
                "width": w,
                "height": h,
                "center_x": center_x,
                "center_y": center_y
            }
            print(json.dumps(output))
            sys.exit(0)
        # 3) 回退：多尺度模板匹配，放宽阈值与步数以适配截图中缩略图
        fallback_threshold = min(threshold, 0.50)
        scale_min_use = min(scale_min, 0.08)
        scale_result = multi_scale_template_match(screenshot, template, threshold=fallback_threshold, scale_min=scale_min_use, scale_max=scale_max)
        if scale_result is None and (t_w > 1.3 * t_h or t_h > 1.3 * t_w):
            t_s = min(t_w, t_h)
            cx, cy = t_w // 2, t_h // 2
            y0, y1 = max(0, cy - t_s // 2), min(t_h, cy + t_s // 2)
            x0, x1 = max(0, cx - t_s // 2), min(t_w, cx + t_s // 2)
            if y1 > y0 and x1 > x0:
                crop = template[y0:y1, x0:x1]
                scale_result = multi_scale_template_match(screenshot, crop, threshold=fallback_threshold, scale_min=scale_min_use, scale_max=scale_max)
        if scale_result is not None:
            x, y, w, h, center_x, center_y = scale_result
            output = {
                "success": True,
                "x": x,
                "y": y,
                "width": w,
                "height": h,
                "center_x": center_x,
                "center_y": center_y
            }
            print(json.dumps(output))
            sys.exit(0)
        print(json.dumps({"success": False, "error": "RoMa/特征点与多尺度模板均未匹配（可检查模板是否在画面中或使用 --method template）"}))
        sys.exit(1)

    # 使用 TM_CCOEFF_NORMED 进行模板匹配（仅同分辨率推荐）
    result = cv2.matchTemplate(screenshot, template, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

    if max_val < threshold:
        print(json.dumps({"success": False, "error": f"未找到匹配 (相似度 {max_val:.3f} < {threshold})"}))
        sys.exit(1)

    x, y = int(max_loc[0]), int(max_loc[1])
    center_x = x + t_w // 2
    center_y = y + t_h // 2

    output = {
        "success": True,
        "x": x,
        "y": y,
        "width": t_w,
        "height": t_h,
        "center_x": center_x,
        "center_y": center_y
    }
    print(json.dumps(output))
    sys.exit(0)

if __name__ == "__main__":
    main()