Преглед на файлове

修改好图像中心点特征点匹配在缩放时候的识别不到的问题

yichael преди 2 месеца
родител
ревизия
2dba5a64fe

+ 10 - 3
nodejs/ef-compiler/actions/fun/fun-parser.js

@@ -141,11 +141,13 @@ function parse(action, parseContext) {
       parsed.region = action.inVars?.[1] ?? action.region
       parsed.variable = action.outVars && action.outVars.length > 0 ? extractVarName(action.outVars[0]) : (action.variable ? extractVarName(action.variable) : undefined)
       break
-    case 'img-center-point-location':
-      parsed.inVars = action.inVars && Array.isArray(action.inVars) ? action.inVars.map(v => extractVarName(v)) : []
+    case 'img-center-point-location': {
+      parsed.inVars = action.inVars && Array.isArray(action.inVars) ? action.inVars.map((v, i) => (i === 1 && Array.isArray(v) ? v : extractVarName(v))) : []
       parsed.template = action.inVars?.[0] ?? action.template
+      parsed.scaleRange = Array.isArray(action.inVars?.[1]) ? action.inVars[1] : undefined
       parsed.variable = action.outVars && action.outVars.length > 0 ? extractVarName(action.outVars[0]) : (action.variable ? extractVarName(action.variable) : undefined)
       break
+    }
     case 'img-cropping':
       parsed.inVars = action.inVars && Array.isArray(action.inVars) ? action.inVars.map(v => extractVarName(v)) : []
       parsed.area = action.inVars?.[0] ?? action.area
@@ -324,8 +326,13 @@ async function run(actionType, action, ctx, device, folderPath) {
       }
       if (!templatePath) templatePath = action.template
       if (!templatePath) return { success: false, error: '缺少模板图片路径' }
+      let scaleRange = action.inVars?.[1] ?? action.scaleRange
+      if (!Array.isArray(scaleRange) || scaleRange.length < 2) return { success: false, error: 'img-center-point-location 必须填写 inVars[1] 缩放比范围 [min, max],如 [0.2, 1.6]' }
+      const minS = Number(scaleRange[0])
+      const maxS = Number(scaleRange[1])
+      if (Number.isNaN(minS) || Number.isNaN(maxS) || minS >= maxS) return { success: false, error: 'img-center-point-location inVars[1] 缩放比范围无效,需为两个数字且 min < max' }
       if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' }
-      const result = await executeImgCenterPointLocation({ device, template: templatePath, folderPath })
+      const result = await executeImgCenterPointLocation({ device, template: templatePath, folderPath, scaleRange: [minS, maxS] })
       if (!result.success) return { success: false, error: `图像中心点定位失败: ${result.error}` }
       const outputVarName = action.outVars?.[0] != null ? extractVarName(String(action.outVars[0]).trim()) : (action.variable ? extractVarName(action.variable) : null)
       if (outputVarName) {

+ 13 - 7
nodejs/ef-compiler/actions/fun/img-center-point-location.js

@@ -18,8 +18,8 @@ const imageMatchScriptPath = path.join(projectRoot, 'python', 'scripts', 'image-
 const tagName = 'img-center-point-location'
 
 const schema = {
-  description: '在屏幕截图中查找模板图片的位置并返回中心点坐标(可用于定位/点击)。',
-  inputs: { template: '模板图片路径(相对于工作流目录)', variable: '输出变量名(保存中心点坐标)' },
+  description: '在屏幕截图中查找模板图片的位置并返回中心点坐标(可用于定位/点击)。inVars[1] 为缩放比范围 [min, max],必填。',
+  inputs: { template: '模板图片路径', scaleRange: '缩放比范围数组 [min, max],如 [0.2, 1.6]', variable: '输出变量名' },
   outputs: { variable: '中心点坐标(JSON 字符串格式,如:{"x":123,"y":456})' },
 }
 
@@ -35,9 +35,13 @@ function getPythonPath() {
   return 'python'
 }
 
-/** 在设备截图中匹配模板,返回坐标与中心点 */
-function matchImageAndGetCoordinate(device, imagePath) {
+/** 在设备截图中匹配模板,返回坐标与中心点。scaleRange 为 [min, max] 缩放比范围,必填。 */
+function matchImageAndGetCoordinate(device, imagePath, scaleRange) {
   if (!imagePath || typeof imagePath !== 'string') return { success: false, error: '模板路径为空' }
+  if (!Array.isArray(scaleRange) || scaleRange.length < 2) return { success: false, error: '缩放比范围 scaleRange 必填,且为 [min, max] 数组,如 [0.2, 1.6]' }
+  const minScale = Number(scaleRange[0])
+  const maxScale = Number(scaleRange[1])
+  if (Number.isNaN(minScale) || Number.isNaN(maxScale) || minScale >= maxScale) return { success: false, error: '缩放比范围无效,需为两个数字且 min < max' }
   const templatePath = path.isAbsolute(imagePath) ? imagePath : path.resolve(projectRoot, imagePath)
   const ts = Date.now()
   const screenshotPath = path.join(os.tmpdir(), `ef-screenshot-${ts}.png`)
@@ -48,7 +52,8 @@ function matchImageAndGetCoordinate(device, imagePath) {
   const adbPath = config.adbPath?.path
     ? (path.isAbsolute(config.adbPath.path) ? config.adbPath.path : path.resolve(projectRoot, config.adbPath.path))
     : path.join(projectRoot, 'lib', 'scrcpy-adb', process.platform === 'win32' ? 'adb.exe' : 'adb')
-  const r = spawnSync(pythonPath, [imageMatchScriptPath, '--adb', adbPath, '--device', device, '--screenshot', screenshotPath.replace(/\\/g, '/'), '--template', templateCopyPath.replace(/\\/g, '/'), '--method', 'feature'], {
+  const args = [imageMatchScriptPath, '--adb', adbPath, '--device', device, '--screenshot', screenshotPath.replace(/\\/g, '/'), '--template', templateCopyPath.replace(/\\/g, '/'), '--method', 'feature', '--scale-min', String(minScale), '--scale-max', String(maxScale)]
+  const r = spawnSync(pythonPath, args, {
     encoding: 'utf-8',
     timeout: 20000,
     env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
@@ -67,15 +72,16 @@ function matchImageAndGetCoordinate(device, imagePath) {
   }
 }
 
-async function executeImgCenterPointLocation({ device, template, folderPath }) {
+async function executeImgCenterPointLocation({ device, template, folderPath, scaleRange }) {
   if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' }
   if (!template || typeof template !== 'string') return { success: false, error: '缺少模板图片路径' }
+  if (!Array.isArray(scaleRange) || scaleRange.length < 2) return { success: false, error: 'img-center-point-location 必须填写 inVars[1] 缩放比范围 [min, max],如 [0.2, 1.6]' }
   const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
   // 绝对路径或带盘符的保持原样;已含子路径(如 tmp/pic0.png)相对 baseDir;否则视为 resources 下文件名
   const isAbsoluteOrDrive = template.startsWith('/') || template.includes(':')
   const hasSubPath = template.includes('/') || template.includes(path.sep)
   const templatePath = isAbsoluteOrDrive ? template : (hasSubPath ? path.join(baseDir, template) : path.join(baseDir, 'resources', template))
-  const result = matchImageAndGetCoordinate(device, templatePath)
+  const result = matchImageAndGetCoordinate(device, templatePath, scaleRange)
   if (!result.success) return { success: false, error: result.error }
   const center = result.clickPosition || { x: result.coordinate.x + result.coordinate.width / 2, y: result.coordinate.y + result.coordinate.height / 2 }
   return { success: true, center, coordinate: result.coordinate }

+ 8 - 0
nodejs/ef-compiler/sequence-runner.js

@@ -93,6 +93,9 @@ async function executeActionSequence(
         } else {
           count = Math.max(0, parseInt(count, 10) || 0)
         }
+        const timesVarKey = action.variable != null ? String(action.variable).replace(/^\{|\}$/g, '').trim() : null
+        const originalDeclaredForTimes = state.declaredVariableNames
+        if (timesVarKey) state.declaredVariableNames = (originalDeclaredForTimes || []).concat(timesVarKey)
         for (let i = 0; i < count; i++) {
           if (shouldStop && shouldStop()) return { success: false, error: 'Execution stopped', completedSteps }
           if (action.variable) variableContext[action.variable.replace(/^\{|\}$/g, '').trim()] = i
@@ -102,10 +105,14 @@ async function executeActionSequence(
             completedSteps += result.completedSteps || 0
           }
         }
+        if (timesVarKey) state.declaredVariableNames = originalDeclaredForTimes
       } else {
         const items = Array.isArray(action.items) ? action.items : []
         const indexKey = action.indexVariable != null ? String(action.indexVariable).replace(/^\{|\}$/g, '').trim() : null
         const variableKey = action.variable != null ? String(action.variable).replace(/^\{|\}$/g, '').trim() : null
+        const originalDeclared = state.declaredVariableNames
+        const forLocals = [indexKey, variableKey].filter(Boolean)
+        if (forLocals.length > 0) state.declaredVariableNames = (originalDeclared || []).concat(forLocals)
         for (let i = 0; i < items.length; i++) {
           if (shouldStop && shouldStop()) return { success: false, error: 'Execution stopped', completedSteps }
           if (indexKey !== null) variableContext[indexKey] = i
@@ -116,6 +123,7 @@ async function executeActionSequence(
             completedSteps += result.completedSteps || 0
           }
         }
+        if (forLocals.length > 0) state.declaredVariableNames = originalDeclared
       }
       continue
     }

+ 23 - 9
python/scripts/image-match.py

@@ -93,9 +93,10 @@ def _numpy_bgr_to_torch_rgb(img_bgr):
     return torch.from_numpy(t).float().div(255.0)
 
 
-def match_by_lightglue(screenshot, template, min_matches=8, device='cpu'):
+def match_by_lightglue(screenshot, template, min_matches=6, device='cpu'):
     """
     使用 LightGlue + SuperPoint 做特征匹配,在截图中找模板位置。
+    参数已调优以提高难图(如缩略图)匹配率:更多特征点、关闭裁剪/早停、放宽匹配与 RANSAC。
     返回 (x, y, w, h, center_x, center_y) 或 None。
     """
     if not HAS_LIGHTGLUE:
@@ -104,8 +105,13 @@ def match_by_lightglue(screenshot, template, min_matches=8, device='cpu'):
     try:
         img0 = _numpy_bgr_to_torch_rgb(screenshot)
         img1 = _numpy_bgr_to_torch_rgb(template)
-        extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)
-        matcher = LightGlue(features='superpoint').eval().to(device)
+        extractor = SuperPoint(max_num_keypoints=4096).eval().to(device)
+        matcher = LightGlue(
+            features='superpoint',
+            depth_confidence=-1,
+            width_confidence=-1,
+            filter_threshold=0.05,
+        ).eval().to(device)
         feats0, feats1, matches01 = match_pair(extractor, matcher, img0, img1, device=device)
         matches = matches01.get('matches')
         if matches is None or matches.shape[0] < min_matches:
@@ -116,7 +122,7 @@ def match_by_lightglue(screenshot, template, min_matches=8, device='cpu'):
         idx1 = matches[:, 1]
         pts_screen = kp0[idx0].cpu().numpy().astype(np.float32)
         pts_template = kp1[idx1].cpu().numpy().astype(np.float32)
-        H, mask = cv2.findHomography(pts_template, pts_screen, cv2.RANSAC, 5.0)
+        H, mask = cv2.findHomography(pts_template, pts_screen, cv2.RANSAC, 8.0)
         if H is None:
             return None
         corners = np.float32([[0, 0], [t_w, 0], [t_w, t_h], [0, t_h]]).reshape(-1, 1, 2)
@@ -180,9 +186,10 @@ def match_by_features(screenshot, template, min_good_matches=8):
     return (x, y, w, h, center_x, center_y)
 
 
-def multi_scale_template_match(screenshot, template, threshold=0.65):
+def multi_scale_template_match(screenshot, template, threshold=0.65, scale_min=0.4, scale_max=1.65):
     """
     多尺度模板匹配:对模板做多种缩放后在截图中匹配,适配不同分辨率(如简单图标、轮廓)。
+    scale_min, scale_max: 缩放比范围,如 0.2~1.6 可匹配缩略图。
     返回 (x, y, w, h, center_x, center_y) 或 None。
     """
     gray_screen = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
@@ -191,8 +198,8 @@ def multi_scale_template_match(screenshot, template, threshold=0.65):
     t_h, t_w = template.shape[:2]
     best = None
     best_val = threshold
-    # 从 0.4 到 1.6 倍缩放,步长约 0.15,保证缩放后不超出截图
-    for scale in np.arange(0.4, 1.65, 0.12):
+    step = max(0.05, (scale_max - scale_min) / 12.0)
+    for scale in np.arange(scale_min, scale_max + step * 0.5, step):
         w = max(8, int(round(t_w * scale)))
         h = max(8, int(round(t_h * scale)))
         if h > sh or w > sw:
@@ -216,9 +223,10 @@ def main():
     method = 'feature'  # feature=特征点匹配(跨分辨率), template=像素模板匹配
     adb_path = None
     device = None
+    scale_min, scale_max = 0.4, 1.65
 
     if len(sys.argv) >= 2 and sys.argv[1] == '--adb':
-        # 用法2:--adb --device --screenshot --template
+        # 用法2:--adb --device --screenshot --template [--scale-min 0.2] [--scale-max 1.6]
         i = 1
         while i < len(sys.argv):
             if sys.argv[i] == '--adb' and i + 1 < len(sys.argv):
@@ -241,6 +249,12 @@ def main():
                 if method not in ('template', 'feature'):
                     method = 'feature'
                 i += 2
+            elif sys.argv[i] == '--scale-min' and i + 1 < len(sys.argv):
+                scale_min = float(sys.argv[i + 1])
+                i += 2
+            elif sys.argv[i] == '--scale-max' and i + 1 < len(sys.argv):
+                scale_max = float(sys.argv[i + 1])
+                i += 2
             else:
                 i += 1
         if adb_path and device and screenshot_path and template_path:
@@ -320,7 +334,7 @@ def main():
             sys.exit(0)
         # 3) 回退:多尺度模板匹配,适合简单图标/轮廓(如心形、纯色图标),跨分辨率
         fallback_threshold = min(threshold, 0.65)
-        scale_result = multi_scale_template_match(screenshot, template, threshold=fallback_threshold)
+        scale_result = multi_scale_template_match(screenshot, template, threshold=fallback_threshold, scale_min=scale_min, scale_max=scale_max)
         if scale_result is not None:
             x, y, w, h, center_x, center_y = scale_result
             output = {

+ 27 - 3
python/scripts/ocr-onnx.py

@@ -20,6 +20,21 @@ def box_center(box):
     ys = [p[1] for p in box]
     return (sum(xs) / len(xs), sum(ys) / len(ys))
 
+
+def normalize_for_match(s):
+    """规范化后用于匹配:去空格、全角括号/数字转半角,便于 OCR 变体(如 下一步(1)、下一步(1))都能匹配 下一步"""
+    if not s:
+        return ''
+    s = (s or '').strip().replace(' ', '').replace('\u3000', '')
+    # 全角括号、数字 → 半角
+    t = []
+    for c in s:
+        if c in ('(', '[', '{'): t.append('(')
+        elif c in (')', ']', '}'): t.append(')')
+        elif '\uff10' <= c <= '\uff19': t.append(chr(ord(c) - 0xfee0))
+        else: t.append(c)
+    return ''.join(t)
+
 def main():
     ap = argparse.ArgumentParser()
     ap.add_argument('--image', required=True, help='图片路径(绝对或相对)')
@@ -67,13 +82,20 @@ def main():
             print(json.dumps(out, ensure_ascii=False))
             sys.exit(1)
 
-        model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
+        model = ONNXPaddleOcr(
+            use_angle_cls=True,
+            use_gpu=False,
+            det_db_thresh=0.2,
+            det_db_box_thresh=0.45,
+            drop_score=0.3,
+        )
         result = model.ocr(img)
 
         find_text = (args.find_text or '').strip()
+        find_norm = normalize_for_match(find_text)
 
         if find_text:
-            # 模式2:查找文字,返回中心点
+            # 模式2:查找文字,返回中心点(支持包含匹配与规范化匹配,如 "下一步" 可匹配 "下一步(1)"、"下一步(1)")
             if not result or not result[0]:
                 out = {'success': False, 'error': f'图中未识别到文字,或未找到: "{find_text}"'}
                 print(json.dumps(out, ensure_ascii=False))
@@ -83,7 +105,9 @@ def main():
                 if not rec:
                     continue
                 text = (rec[0] or '').strip()
-                if find_text in text or text == find_text:
+                text_norm = normalize_for_match(text)
+                if (find_text in text or text == find_text or
+                        (find_norm and (find_norm in text_norm or text_norm == find_norm))):
                     center = box_center(box)
                     if center is not None:
                         cx, cy = int(round(center[0])), int(round(center[1]))

+ 9 - 13
static/process/GenerateNote/process.json

@@ -10,11 +10,8 @@
       "健康减脂餐 轻食沙拉 低卡高蛋白 摆盘",
       "居家有氧运动 女生健身 燃脂操"
     ],
-    "download-path": "tmp",
     "img-url-arr": [],
-    "idx": 0,
-    "pos": "",
-    "thumbRect": ""
+    "pos": ""
   },
   "execute": [
     {
@@ -109,7 +106,7 @@
     },
     {
       "type": "img-center-point-location",
-      "inVars": ["添加笔记.png"],
+      "inVars": ["添加笔记.png", [0.2, 1.6]],
       "outVars": ["{pos}"]
     },
     {
@@ -134,8 +131,7 @@
       "body": [
         {
           "type": "img-center-point-location",
-          "method": "template",
-          "inVars": ["tmp/pic{idx}.png"],
+          "inVars": ["tmp/pic{idx}.png", [0.2, 1.6]],
           "outVars": ["{pos}"]
         },
         {
@@ -145,7 +141,7 @@
         },
         {
           "type": "img-center-point-location",
-          "inVars": ["选中图片.png"],
+          "inVars": ["选中图片.png", [0.2, 1.6]],
           "outVars": ["{pos}"]
         },
         {
@@ -154,11 +150,11 @@
           "inVars": ["{pos}"]
         },
         {
-					"type": "adb",
-					"method": "keyevent",
-					"inVars": ["4"],
-					"outVars": []
-				}
+          "type": "adb",
+          "method": "keyevent",
+          "inVars": ["4"],
+          "outVars": []
+        }
       ]
     },
     {

BIN
static/process/GenerateNote/tmp/pic0.png


BIN
static/process/GenerateNote/tmp/pic1.png